AlexanderSaydakov commented on code in PR #428:
URL: https://github.com/apache/datasketches-cpp/pull/428#discussion_r1536547134
##########
tdigest/include/tdigest_impl.hpp:
##########
@@ -282,68 +275,76 @@ double tdigest<T, A>::weighted_average(double x1, double
w1, double x2, double w
}
template<typename T, typename A>
-void tdigest<T, A>::serialize(std::ostream& os) const {
- const_cast<tdigest*>(this)->merge_buffered(); // side effect
+void tdigest<T, A>::serialize(std::ostream& os, bool with_buffer) const {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
write(os, is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE :
PREAMBLE_LONGS_MULTIPLE);
write(os, SERIAL_VERSION);
write(os, SKETCH_TYPE);
write(os, k_);
const uint8_t flags_byte(
- (is_empty() ? 1 << flags::IS_EMPTY : 0) |
- (is_single_value() ? 1 << flags::IS_SINGLE_VALUE : 0) |
- (reverse_merge_ ? 1 << flags::REVERSE_MERGE : 0)
+ (is_empty() ? 1 << flags::IS_EMPTY : 0)
+ | (is_single_value() ? 1 << flags::IS_SINGLE_VALUE : 0)
+ | (reverse_merge_ ? 1 << flags::REVERSE_MERGE : 0)
);
write(os, flags_byte);
write<uint16_t>(os, 0); // unused
-
if (is_empty()) return;
-
if (is_single_value()) {
write(os, min_);
return;
}
-
write(os, static_cast<uint32_t>(centroids_.size()));
- write<uint32_t>(os, 0); // unused
-
+ write(os, static_cast<uint32_t>(buffer_.size()));
write(os, min_);
write(os, max_);
- write(os, centroids_.data(), centroids_.size() * sizeof(centroid));
+ if (centroids_.size() > 0) write(os, centroids_.data(), centroids_.size() *
sizeof(centroid));
+ if (buffer_.size() > 0) write(os, buffer_.data(), buffer_.size() *
sizeof(T));
}
template<typename T, typename A>
-auto tdigest<T, A>::serialize(unsigned header_size_bytes) const ->
vector_bytes {
- const_cast<tdigest*>(this)->merge_buffered(); // side effect
- const uint8_t preamble_longs = is_empty() || is_single_value() ?
PREAMBLE_LONGS_EMPTY_OR_SINGLE : PREAMBLE_LONGS_MULTIPLE;
- const size_t size_bytes = preamble_longs * sizeof(uint64_t) +
- (is_empty() ? 0 : (is_single_value() ? sizeof(T) : sizeof(T) * 2 +
sizeof(centroid) * centroids_.size()));
- vector_bytes bytes(size_bytes, 0, allocator_);
- uint8_t* ptr = bytes.data() + header_size_bytes;
+uint8_t tdigest<T, A>::get_preamble_longs() const {
+ return is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE :
PREAMBLE_LONGS_MULTIPLE;
+}
- *ptr++ = preamble_longs;
+template<typename T, typename A>
+size_t tdigest<T, A>::get_serialized_size_bytes(bool with_buffer) const {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
+ size_t size_bytes = get_preamble_longs() * sizeof(uint64_t);
+ if (is_empty()) return size_bytes;
+ if (is_single_value()) return size_bytes + sizeof(T);
+ size_bytes += sizeof(T) * 2 // min and max
+ + sizeof(centroid) * centroids_.size();
+ if (with_buffer) size_bytes += sizeof(T) * buffer_.size(); // count is a
part of preamble
+ return size_bytes;
+}
+
+template<typename T, typename A>
+auto tdigest<T, A>::serialize(unsigned header_size_bytes, bool with_buffer)
const -> vector_bytes {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
+ vector_bytes bytes(get_serialized_size_bytes(with_buffer), 0,
buffer_.get_allocator());
+ uint8_t* ptr = bytes.data() + header_size_bytes;
+ *ptr++ = get_preamble_longs();
Review Comment:
I don't see how this would happen.
I thought about encapsulating this keeping track of the offset. or, perhaps,
using something like std::strstream
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]