This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch patch_for_rc4 in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit c60448e0443f1bb40f84f378750f17ca65bc5541 Author: Jon Malkin <[email protected]> AuthorDate: Tue Jun 2 13:54:50 2020 -0700 cherry-pick #151: no checking of seed hash for empty compact sketches --- theta/include/theta_a_not_b_impl.hpp | 3 +-- theta/include/theta_intersection_impl.hpp | 2 +- theta/include/theta_sketch_impl.hpp | 14 ++++++++++---- theta/test/theta_compact_empty_from_java.sk | Bin 8 -> 8 bytes 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/theta/include/theta_a_not_b_impl.hpp b/theta/include/theta_a_not_b_impl.hpp index cc171ce..f080903 100644 --- a/theta/include/theta_a_not_b_impl.hpp +++ b/theta/include/theta_a_not_b_impl.hpp @@ -37,10 +37,9 @@ seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed)) template<typename A> compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch_alloc<A>& a, const theta_sketch_alloc<A>& b, bool ordered) const { - if (a.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered); + if (a.is_empty() || a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered); if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch"); if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch"); - if (a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered); const uint64_t theta = std::min(a.get_theta64(), b.get_theta64()); vector_u64<A> keys; diff --git a/theta/include/theta_intersection_impl.hpp b/theta/include/theta_intersection_impl.hpp index 79fea4e..6be6757 100644 --- a/theta/include/theta_intersection_impl.hpp +++ b/theta/include/theta_intersection_impl.hpp @@ -44,7 +44,7 @@ seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed)) template<typename A> void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) { if (is_empty_) return; - if (sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch"); + if (!sketch.is_empty() && sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch"); is_empty_ |= sketch.is_empty(); theta_ = std::min(theta_, sketch.get_theta64()); if (is_valid_ && num_keys_ == 0) return; diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp index 417dfa1..0514884 100644 --- a/theta/include/theta_sketch_impl.hpp +++ b/theta/include/theta_sketch_impl.hpp @@ -101,9 +101,9 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(st is.read((char*)&seed_hash, sizeof(seed_hash)); check_serial_version(serial_version, SERIAL_VERSION); - check_seed_hash(seed_hash, get_seed_hash(seed)); if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) { + check_seed_hash(seed_hash, get_seed_hash(seed)); typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6); typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU; return unique_ptr( @@ -114,6 +114,8 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(st } ); } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) { + const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); + if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed)); typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC; return unique_ptr( static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))), @@ -146,9 +148,9 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(co ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash)); check_serial_version(serial_version, SERIAL_VERSION); - check_seed_hash(seed_hash, get_seed_hash(seed)); if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) { + check_seed_hash(seed_hash, get_seed_hash(seed)); typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6); typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU; return unique_ptr( @@ -161,6 +163,8 @@ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(co } ); } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) { + const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); + if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed)); typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC; return unique_ptr( static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>( @@ -753,7 +757,8 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::is is.read((char*)&seed_hash, sizeof(seed_hash)); theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE); theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION); - theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed)); + const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); + if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed)); return internal_deserialize(is, preamble_longs, flags_byte, seed_hash); } @@ -801,7 +806,8 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash)); theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE); theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION); - theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed)); + const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); + if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed)); return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash); } diff --git a/theta/test/theta_compact_empty_from_java.sk b/theta/test/theta_compact_empty_from_java.sk index 44730d3..f6c647f 100644 Binary files a/theta/test/theta_compact_empty_from_java.sk and b/theta/test/theta_compact_empty_from_java.sk differ --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
