jmalkin commented on code in PR #428:
URL: https://github.com/apache/datasketches-cpp/pull/428#discussion_r1536545592
##########
tdigest/test/tdigest_test.cpp:
##########
@@ -54,8 +54,9 @@ TEST_CASE("one value", "[tdigest]") {
TEST_CASE("many values", "[tdigest]") {
const size_t n = 10000;
- tdigest_double td(100);
+ tdigest_double td;
for (size_t i = 0; i < n; ++i) td.update(i);
+// std::cout << td.to_string(true);
Review Comment:
not a blocker but some extra comments left in it seems
##########
tdigest/include/tdigest_impl.hpp:
##########
@@ -282,68 +275,76 @@ double tdigest<T, A>::weighted_average(double x1, double
w1, double x2, double w
}
template<typename T, typename A>
-void tdigest<T, A>::serialize(std::ostream& os) const {
- const_cast<tdigest*>(this)->merge_buffered(); // side effect
+void tdigest<T, A>::serialize(std::ostream& os, bool with_buffer) const {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
write(os, is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE :
PREAMBLE_LONGS_MULTIPLE);
write(os, SERIAL_VERSION);
write(os, SKETCH_TYPE);
write(os, k_);
const uint8_t flags_byte(
- (is_empty() ? 1 << flags::IS_EMPTY : 0) |
- (is_single_value() ? 1 << flags::IS_SINGLE_VALUE : 0) |
- (reverse_merge_ ? 1 << flags::REVERSE_MERGE : 0)
+ (is_empty() ? 1 << flags::IS_EMPTY : 0)
+ | (is_single_value() ? 1 << flags::IS_SINGLE_VALUE : 0)
+ | (reverse_merge_ ? 1 << flags::REVERSE_MERGE : 0)
);
write(os, flags_byte);
write<uint16_t>(os, 0); // unused
-
if (is_empty()) return;
-
if (is_single_value()) {
write(os, min_);
return;
}
-
write(os, static_cast<uint32_t>(centroids_.size()));
- write<uint32_t>(os, 0); // unused
-
+ write(os, static_cast<uint32_t>(buffer_.size()));
write(os, min_);
write(os, max_);
- write(os, centroids_.data(), centroids_.size() * sizeof(centroid));
+ if (centroids_.size() > 0) write(os, centroids_.data(), centroids_.size() *
sizeof(centroid));
+ if (buffer_.size() > 0) write(os, buffer_.data(), buffer_.size() *
sizeof(T));
}
template<typename T, typename A>
-auto tdigest<T, A>::serialize(unsigned header_size_bytes) const ->
vector_bytes {
- const_cast<tdigest*>(this)->merge_buffered(); // side effect
- const uint8_t preamble_longs = is_empty() || is_single_value() ?
PREAMBLE_LONGS_EMPTY_OR_SINGLE : PREAMBLE_LONGS_MULTIPLE;
- const size_t size_bytes = preamble_longs * sizeof(uint64_t) +
- (is_empty() ? 0 : (is_single_value() ? sizeof(T) : sizeof(T) * 2 +
sizeof(centroid) * centroids_.size()));
- vector_bytes bytes(size_bytes, 0, allocator_);
- uint8_t* ptr = bytes.data() + header_size_bytes;
+uint8_t tdigest<T, A>::get_preamble_longs() const {
+ return is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE :
PREAMBLE_LONGS_MULTIPLE;
+}
- *ptr++ = preamble_longs;
+template<typename T, typename A>
+size_t tdigest<T, A>::get_serialized_size_bytes(bool with_buffer) const {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
+ size_t size_bytes = get_preamble_longs() * sizeof(uint64_t);
+ if (is_empty()) return size_bytes;
+ if (is_single_value()) return size_bytes + sizeof(T);
+ size_bytes += sizeof(T) * 2 // min and max
+ + sizeof(centroid) * centroids_.size();
+ if (with_buffer) size_bytes += sizeof(T) * buffer_.size(); // count is a
part of preamble
+ return size_bytes;
+}
+
+template<typename T, typename A>
+auto tdigest<T, A>::serialize(unsigned header_size_bytes, bool with_buffer)
const -> vector_bytes {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
+ vector_bytes bytes(get_serialized_size_bytes(with_buffer), 0,
buffer_.get_allocator());
+ uint8_t* ptr = bytes.data() + header_size_bytes;
+ *ptr++ = get_preamble_longs();
Review Comment:
One of the more aggressive warning flags complains about *ptr++ as possibly
incrementing a null ptr
##########
tdigest/include/tdigest_impl.hpp:
##########
@@ -282,68 +275,76 @@ double tdigest<T, A>::weighted_average(double x1, double
w1, double x2, double w
}
template<typename T, typename A>
-void tdigest<T, A>::serialize(std::ostream& os) const {
- const_cast<tdigest*>(this)->merge_buffered(); // side effect
+void tdigest<T, A>::serialize(std::ostream& os, bool with_buffer) const {
+ if (!with_buffer) const_cast<tdigest*>(this)->compress(); // side effect
write(os, is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE :
PREAMBLE_LONGS_MULTIPLE);
Review Comment:
can call the newly added `get_preamble_longs()` here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]