c-dickens commented on code in PR #325:
URL: https://github.com/apache/datasketches-cpp/pull/325#discussion_r1073963195
##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+ /*
+ * C++ implementation of the CountMin sketch data structure of Cormode and
Muthukrishnan.
+ * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+ * @author Charlie Dickens
+ */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+ static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+ uint64_t num_hashes, num_buckets, seed, sketch_length ;
+ T total_weight = 0;
+ std::vector<uint64_t> hash_seeds ;
+
+ /**
+ * Creates an instance of the sketch given parameters num_hashes, num_buckets
and hash seed, `seed`.
+ * @param num_hashes : number of hash functions in the sketch. Equivalently
the number of rows in the array
+ * @param num_buckets : number of buckets that hash functions map into.
Equivalently the number of columns in the array
+ * @param seed for hash function
+ *
+ * The template type T is the type of the vector that contains the weights,
not the objects inserted into the sketch.
+ * The items inserted into the sketch can be arbitrary type, so long as they
are hashable via murmurhash.
+ * Only update and estimate methods are added for uint64_t and string types.
+ */
+ count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed =
DEFAULT_SEED) ;
+
+ std::vector<T> sketch ; // the array stored by the sketch
+
+ /**
+ * @return configured num_hashes of this sketch
+ */
+ uint64_t get_num_hashes() ;
+
+ /**
+ * @return configured num_buckets of this sketch
+ */
+ uint64_t get_num_buckets() ;
+
+ /**
+ * @return configured seed of this sketch
+ */
+ uint64_t get_seed() ;
+
+ /**
+ * @return vector of the sketch configuration: {num_hsahes, num_buckets, seed}
+ */
+ std::vector<uint64_t> get_config() ; // Sketch parameter configuration --
needed for merging.
+
+ /**
+ * @return epsilon : double
+ * The maximum permissible error for any frequency estimate query.
+ * epsilon = ceil(e / num_buckets)
+ */
+ double get_relative_error() ;
+
+ /**
+ * @return total_weight : typename T
+ * The total weight currently inserted into the stream.
+ */
+ T get_total_weight() ;
+
+ /**
+ * @return vector of the sketch data structure
+ * Required for merging.
+ */
+ std::vector<T> get_sketch() ;
Review Comment:
I have removed this function and updated with an iterator.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]