This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch density_custom_kernel in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit fd62b1831356fd2b705f7c8de959fa54620e0de8 Author: AlexanderSaydakov <[email protected]> AuthorDate: Tue Dec 13 19:42:32 2022 -0800 added iterator and to_string --- density/include/density_sketch.hpp | 35 +++++++++ density/include/density_sketch_impl.hpp | 124 +++++++++++++++++++++++++++++++- density/test/density_sketch_test.cpp | 17 ++++- 3 files changed, 173 insertions(+), 3 deletions(-) diff --git a/density/include/density_sketch.hpp b/density/include/density_sketch.hpp index a7edb93..551d51f 100755 --- a/density/include/density_sketch.hpp +++ b/density/include/density_sketch.hpp @@ -26,6 +26,8 @@ #include <numeric> #include <cmath> +#include "common_defs.hpp" + /* * Based on the following paper: * Zohar Karnin, Edo Liberty "Discrepancy, Coresets, and Sketches in Machine Learning" @@ -114,6 +116,17 @@ public: */ Allocator get_allocator() const; + /** + * Prints a summary of the sketch. + * @param print_levels if true include information about levels + * @param print_items if true include sketch data + */ + string<Allocator> to_string(bool print_levels = false, bool print_items = false) const; + + class const_iterator; + const_iterator begin() const; + const_iterator end() const; + private: uint16_t k_; uint32_t dim_; @@ -125,6 +138,28 @@ private: void compact_level(unsigned height); }; +template<typename T, typename K, typename A> +class density_sketch<T, K, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> { +public: + using Vector = density_sketch<T, K, A>::Vector; + using value_type = std::pair<const Vector&, const uint64_t>; + const_iterator& operator++(); + const_iterator& operator++(int); + bool operator==(const const_iterator& other) const; + bool operator!=(const const_iterator& other) const; + const value_type operator*() const; + const return_value_holder<value_type> operator->() const; +private: + using LevelsIterator = typename density_sketch<T, K, A>::Levels::const_iterator; + using LevelIterator = typename density_sketch<T, K, A>::Level::const_iterator; + LevelsIterator levels_it_; + LevelsIterator levels_end_; + LevelIterator level_it_; + unsigned height_; + friend class density_sketch<T, K, A>; + const_iterator(LevelsIterator begin, LevelsIterator end); +}; + } /* namespace datasketches */ #include "density_sketch_impl.hpp" diff --git a/density/include/density_sketch_impl.hpp b/density/include/density_sketch_impl.hpp index 895a3d6..6771767 100755 --- a/density/include/density_sketch_impl.hpp +++ b/density/include/density_sketch_impl.hpp @@ -21,8 +21,8 @@ #define DENSITY_SKETCH_IMPL_HPP_ #include <algorithm> +#include <sstream> -#include "common_defs.hpp" #include "conditional_forward.hpp" namespace datasketches { @@ -140,6 +140,128 @@ void density_sketch<T, K, A>::compact_level(unsigned height) { level.clear(); } +template<typename T, typename K, typename A> +string<A> density_sketch<T, K, A>::to_string(bool print_levels, bool print_items) const { + // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements. + // The stream does not support passing an allocator instance, and alternatives are complicated. + std::ostringstream os; + os << "### Density sketch summary:" << std::endl; + os << " K : " << k_ << std::endl; + os << " Dim : " << dim_ << std::endl; + os << " Empty : " << (is_empty() ? "true" : "false") << std::endl; + os << " N : " << n_ << std::endl; + os << " Retained items : " << num_retained_ << std::endl; + os << " Levels : " << levels_.size() << std::endl; + os << "### End sketch summary" << std::endl; + + if (print_levels) { + os << "### Density sketch levels:" << std::endl; + os << " height: size" << std::endl; + for (unsigned height = 0; height < levels_.size(); ++height) { + os << " " << height << ": " + << levels_[height].size() << std::endl; + } + os << "### End sketch levels" << std::endl; + } + + if (print_items) { + os << "### Density sketch data:" << std::endl; + unsigned level = 0; + for (unsigned height = 0; height < levels_.size(); ++height) { + os << " level " << height << ": " << std::endl; + for (const auto& point: levels_[height]) { + os << " ["; + bool first = true; + for (auto value: point) { + if (first) { + first = false; + } else { + os << ", "; + } + os << value; + } + os << "]" << std::endl; + } + ++level; + } + os << "### End sketch data" << std::endl; + } + return string<A>(os.str().c_str(), levels_.get_allocator()); +} + +template<typename T, typename K, typename A> +auto density_sketch<T, K, A>::begin() const -> const_iterator { + return const_iterator(levels_.begin(), levels_.end()); +} + +template<typename T, typename K, typename A> +auto density_sketch<T, K, A>::end() const -> const_iterator { + return const_iterator(levels_.end(), levels_.end()); +} + +// iterator + +template<typename T, typename K, typename A> +density_sketch<T, K, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end): +levels_it_(begin), +levels_end_(end), +level_it_(), +height_(0) +{ + // skip empty levels + while (levels_it_ != levels_end_) { + level_it_ = levels_it_->begin(); + if (level_it_ != levels_it_->end()) break; + ++levels_it_; + } +} + +template<typename T, typename K, typename A> +auto density_sketch<T, K, A>::const_iterator::operator++() -> const_iterator& { + ++level_it_; + if (level_it_ == levels_it_->end()) { + ++levels_it_; + ++height_; + // skip empty levels + while (levels_it_ != levels_end_) { + level_it_ = levels_it_->begin(); + if (level_it_ != levels_it_->end()) break; + ++levels_it_; + ++height_; + } + } + return *this; +} + +template<typename T, typename K, typename A> +auto density_sketch<T, K, A>::const_iterator::operator++(int) -> const_iterator& { + const_iterator tmp(*this); + operator++(); + return tmp; +} + +template<typename T, typename K, typename A> +bool density_sketch<T, K, A>::const_iterator::operator==(const const_iterator& other) const { + if (levels_it_ != other.levels_it_) return false; + if (levels_it_ == levels_end_) return true; + return level_it_ == other.level_it_; +} + +template<typename T, typename K, typename A> +bool density_sketch<T, K, A>::const_iterator::operator!=(const const_iterator& other) const { + return !operator==(other); +} + +template<typename T, typename K, typename A> +auto density_sketch<T, K, A>::const_iterator::operator*() const -> const value_type { + return value_type(*level_it_, 1ULL << height_); +} + +template<typename T, typename K, typename A> +auto density_sketch<T, K, A>::const_iterator::operator->() const -> const return_value_holder<value_type> { + return **this; +} + } /* namespace datasketches */ #endif diff --git a/density/test/density_sketch_test.cpp b/density/test/density_sketch_test.cpp index 704d3ac..a51ceb2 100755 --- a/density/test/density_sketch_test.cpp +++ b/density/test/density_sketch_test.cpp @@ -21,8 +21,6 @@ #include <density_sketch.hpp> -#include <iostream> - namespace datasketches { TEST_CASE("density sketch: empty", "[density_sketch]") { @@ -58,4 +56,19 @@ TEST_CASE("density sketch: merge", "[density_sketch]") { REQUIRE(sketch1.get_num_retained() == 3); } +TEST_CASE("density sketch: iterator", "[density_sketch]") { + density_sketch<float> sketch(10, 3); + unsigned n = 1000; + for (unsigned i = 1; i <= n; ++i) sketch.update(std::vector<float>(3, i)); + REQUIRE(sketch.get_n() == n); + //std::cout << sketch.to_string(true, true); + unsigned count = 0; + for (auto pair: sketch) { + ++count; + // just to assert something about the output + REQUIRE(pair.first.size() == sketch.get_dim()); + } + REQUIRE(count == sketch.get_num_retained()); +} + } /* namespace datasketches */ --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
