This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch fix_theta_compressed_stream in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit 6be246d26d50dba183c025feafe5014675632c42 Author: AlexanderSaydakov <[email protected]> AuthorDate: Wed Jul 24 17:35:53 2024 -0700 fixed compressed theta stream serialization --- theta/include/theta_sketch_impl.hpp | 1 + theta/test/theta_sketch_serialize_for_java.cpp | 2 +- theta/test/theta_sketch_test.cpp | 41 ++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp index c31d0ba..b6a5d7e 100644 --- a/theta/include/theta_sketch_impl.hpp +++ b/theta/include/theta_sketch_impl.hpp @@ -506,6 +506,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const previous = entries_[i]; offset = pack_bits(delta, entry_bits, ptr, offset); } + if (offset > 0) ++ptr; write(os, buffer.data(), ptr - buffer.data()); } } diff --git a/theta/test/theta_sketch_serialize_for_java.cpp b/theta/test/theta_sketch_serialize_for_java.cpp index 487553f..876551f 100644 --- a/theta/test/theta_sketch_serialize_for_java.cpp +++ b/theta/test/theta_sketch_serialize_for_java.cpp @@ -43,7 +43,7 @@ TEST_CASE("theta sketch generate compressed", "[serialize_for_java]") { REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); std::ofstream os("theta_compressed_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); - sketch.compact().serialize(os); + sketch.compact().serialize_compressed(os); } } diff --git a/theta/test/theta_sketch_test.cpp b/theta/test/theta_sketch_test.cpp index 0721898..97c4f14 100644 --- a/theta/test/theta_sketch_test.cpp +++ b/theta/test/theta_sketch_test.cpp @@ -517,6 +517,47 @@ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]" } } +TEST_CASE("theta sketch: serialize deserialize small compressed", "[theta_sketch]") { + auto update_sketch = update_theta_sketch::builder().build(); + for (int i = 0; i < 10; i++) update_sketch.update(i); + auto compact_sketch = update_sketch.compact(); + + auto bytes = compact_sketch.serialize_compressed(); + REQUIRE(bytes.size() == compact_sketch.get_serialized_size_bytes(true)); + { // deserialize bytes + auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), bytes.size()); + REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained()); + REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta()); + auto iter = deserialized_sketch.begin(); + for (const auto key: compact_sketch) { + REQUIRE(*iter == key); + ++iter; + } + } + { // wrap bytes + auto wrapped_sketch = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size()); + REQUIRE(wrapped_sketch.get_num_retained() == compact_sketch.get_num_retained()); + REQUIRE(wrapped_sketch.get_theta() == compact_sketch.get_theta()); + auto iter = wrapped_sketch.begin(); + for (const auto key: compact_sketch) { + REQUIRE(*iter == key); + ++iter; + } + } + + std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); + compact_sketch.serialize_compressed(s); + REQUIRE(static_cast<size_t>(s.tellp()) == compact_sketch.get_serialized_size_bytes(true)); + auto deserialized_sketch = compact_theta_sketch::deserialize(s); + REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained()); + REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta()); + auto iter = deserialized_sketch.begin(); + for (const auto key: compact_sketch) { + REQUIRE(*iter == key); + ++iter; + } +} + TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") { auto update_sketch = update_theta_sketch::builder().build(); for (int i = 0; i < 10000; i++) update_sketch.update(i); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
