jmalkin commented on code in PR #379:
URL: https://github.com/apache/datasketches-cpp/pull/379#discussion_r1248189466
##########
count/include/count_min.hpp:
##########
@@ -25,26 +25,27 @@
namespace datasketches {
- /*
- * C++ implementation of the CountMin sketch data structure of Cormode and
Muthukrishnan.
- * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
- * The template type W is the type of the vector that contains the weights
of the objects inserted into the sketch,
- * not the type of the input items themselves.
- * @author Charlie Dickens
- */
-
+/**
+ * C++ implementation of the CountMin sketch data structure of Cormode and
Muthukrishnan.
+ * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+ * The template type W is the type of the vector that contains the weights of
the objects inserted into the sketch,
+ * not the type of the input items themselves.
+ * @author Charlie Dickens
+ */
template <typename W,
typename Allocator = std::allocator<W>>
class count_min_sketch{
static_assert(std::is_arithmetic<W>::value, "Arithmetic type expected");
public:
using allocator_type = Allocator;
+ using const_iterator = typename std::vector<W, Allocator>::const_iterator;
/**
* Creates an instance of the sketch given parameters _num_hashes,
_num_buckets and hash seed, `seed`.
* @param num_hashes : number of hash functions in the sketch. Equivalently
the number of rows in the array
* @param num_buckets : number of buckets that hash functions map into.
Equivalently the number of columns in the array
* @param seed for hash function
+ * @param allocator to use by this instance
Review Comment:
for/with rather than by?
##########
common/include/serde.hpp:
##########
@@ -30,23 +30,56 @@
namespace datasketches {
-// serialize and deserialize
+/// Interface for serializing and deserializing items
template<typename T, typename Enable = void> struct serde {
- // stream serialization
+ /**
+ * Stream serialization
+ * @param os output stream
+ * @param items pointer to array of items
+ * @param num number of items
+ */
void serialize(std::ostream& os, const T* items, unsigned num) const;
+
+ /**
+ * Stream deserialization
+ * @param is input stream
+ * @param items pointer to array of items
Review Comment:
I think we should specify allocation but not initialized here as opposed to
inline with the declaration
##########
tuple/include/tuple_sketch.hpp:
##########
@@ -522,8 +558,7 @@ class compact_tuple_sketch: public tuple_sketch<Summary,
Allocator> {
};
-// builder
-
+// base builder
Review Comment:
intentional to have only `//` rather than `///`?
##########
req/include/req_sketch.hpp:
##########
@@ -39,26 +81,55 @@ class req_sketch {
using comparator = Comparator;
using Compactor = req_compactor<T, Comparator, Allocator>;
using AllocCompactor = typename std::allocator_traits<Allocator>::template
rebind_alloc<Compactor>;
+ using vector_double = typename quantiles_sorted_view<T, Comparator,
Allocator>::vector_double;
+
+ /**
+ * Quantile return type.
+ * This is to return quantiles either by value (for arithmetic types) or by
const reference (for all other types)
+ */
+ using quantile_return_type = typename quantiles_sorted_view<T, Comparator,
Allocator>::quantile_return_type;
/**
* Constructor
* @param k Controls the size and error of the sketch. It must be even and
in the range [4, 1024], inclusive.
* Value of 12 roughly corresponds to 1% relative error guarantee at 95%
confidence.
* @param hra if true, the default, the high ranks are prioritized for better
* accuracy. Otherwise the low ranks are prioritized for better accuracy.
- * @param comparator to use by this instance
- * @param allocator to use by this instance
+ * @param comparator instance to use by this sketch instance
Review Comment:
`for use by` or `to use with`
and in the next row
##########
count/include/count_min.hpp:
##########
@@ -111,7 +114,7 @@ class count_min_sketch{
/**
* Specific get_estimate function for int64_t type
* see generic get_estimate function
- * @param item : uint64_t type.
+ * @param item : int64_t type.
Review Comment:
remove the `:`
##########
theta/include/theta_sketch.hpp:
##########
@@ -106,6 +107,7 @@ class base_theta_sketch_alloc {
virtual void print_items(std::ostringstream& os) const = 0;
};
+/// Base class for the Theta Sketch, a generalization of the Kth Minimum Value
(KMV) sketch.
Review Comment:
Is it a generalization of or a type of KMV sketch? Maybe sidestep and
describe it as a type of KMV sketch for counting distinct items?
##########
common/include/quantiles_sorted_view.hpp:
##########
@@ -27,37 +27,129 @@
namespace datasketches {
+/**
+ * Sorted view for quantiles sketches (REQ, KLL and Quantiles)
+ */
template<
typename T,
typename Comparator, // strict weak ordering function (see C++ named
requirements: Compare)
typename Allocator
>
class quantiles_sorted_view {
public:
+ /// Entry type
using Entry = typename std::conditional<std::is_arithmetic<T>::value,
std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
using AllocEntry = typename std::allocator_traits<Allocator>::template
rebind_alloc<Entry>;
using Container = std::vector<Entry, AllocEntry>;
+ /// @private
quantiles_sorted_view(uint32_t num, const Comparator& comparator, const
Allocator& allocator);
+ /// @private
template<typename Iterator>
void add(Iterator begin, Iterator end, uint64_t weight);
+ /// @private
void convert_to_cummulative();
class const_iterator;
+
+ /**
+ * Iterator pointing to the first entry in the view.
+ * If the view is empty, the returned iterator must not be dereferenced or
incremented.
+ * @return iterator pointing to the first entry
+ */
const_iterator begin() const;
+
+ /**
+ * Iterator pointing to the past-the-end entry in the view.
+ * The past-the-end entry is the hypothetical entry that would follow the
last entry.
+ * It does not point to any entry, and must not be dereferenced or
incremented.
+ * @return iterator pointing to the past-the-end entry
+ */
const_iterator end() const;
+ /// @return size of the view
size_t size() const;
+ /**
+ * Returns an approximation to the normalized rank of the given item from 0
to 1, inclusive.
Review Comment:
I first read this as computing rank of an item from 0 to 1, which confused
me. I think it might be worth splitting it into 2 sentences (end after item) to
note that the result will be in [0,1] in a more detailed description.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]