This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch density_custom_kernel in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit 2a32802a311eb6cdd0d2405c263ce983114c1ff8 Author: AlexanderSaydakov <[email protected]> AuthorDate: Mon Dec 19 22:53:56 2022 -0800 python wrapper --- python/CMakeLists.txt | 2 + python/src/datasketches.cpp | 8 ++++ python/src/density_wrapper.cpp | 86 ++++++++++++++++++++++++++++++++++++++++++ python/tests/density_test.py | 62 ++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 0eff12a..508e173 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -47,6 +47,7 @@ target_link_libraries(python req quantiles count + density pybind11::module ) @@ -78,6 +79,7 @@ target_sources(python src/vo_wrapper.cpp src/req_wrapper.cpp src/quantiles_wrapper.cpp + src/density_wrapper.cpp src/ks_wrapper.cpp src/count_wrapper.cpp src/vector_of_kll.cpp diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp index 2186a33..e10074a 100644 --- a/python/src/datasketches.cpp +++ b/python/src/datasketches.cpp @@ -31,7 +31,11 @@ void init_tuple(py::module& m); void init_vo(py::module& m); void init_req(py::module& m); void init_quantiles(py::module& m); +<<<<<<< HEAD void init_count_min(py::module& m); +======= +void init_density(py::module& m); +>>>>>>> c354c56 (python wrapper) void init_vector_of_kll(py::module& m); // supporting objects @@ -48,7 +52,11 @@ PYBIND11_MODULE(_datasketches, m) { init_vo(m); init_req(m); init_quantiles(m); +<<<<<<< HEAD init_count_min(m); +======= + init_density(m); +>>>>>>> c354c56 (python wrapper) init_vector_of_kll(m); init_kolmogorov_smirnov(m); diff --git a/python/src/density_wrapper.cpp b/python/src/density_wrapper.cpp new file mode 100644 index 0000000..6ae5a47 --- /dev/null +++ b/python/src/density_wrapper.cpp @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "density_sketch.hpp" + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> +#include <pybind11/numpy.h> +#include <vector> + +namespace py = pybind11; + +namespace datasketches { + +namespace python { + +template<typename T> +py::list density_sketch_get_coreset(const density_sketch<T>& sketch) { + py::list list(sketch.get_num_retained()); + unsigned i = 0; + for (auto pair: sketch) { + list[i++] = py::make_tuple(pair.first, pair.second); + } + return list; +} + +} +} + +namespace dspy = datasketches::python; + +template<typename T> +void bind_density_sketch(py::module &m, const char* name) { + using namespace datasketches; + + py::class_<density_sketch<T>>(m, name) + .def(py::init<uint16_t, uint32_t>(), py::arg("k"), py::arg("dim")) + .def("update", static_cast<void (density_sketch<T>::*)(const std::vector<T>&)>(&density_sketch<T>::update), + "Updates the sketch with the given vector") + .def("update", static_cast<void (density_sketch<T>::*)(std::vector<T>&&)>(&density_sketch<T>::update), + "Updates the sketch with the given vector") + .def("merge", static_cast<void (density_sketch<T>::*)(const density_sketch<T>&)>(&density_sketch<T>::merge), py::arg("sketch"), + "Merges the provided sketch into this one") + .def("is_empty", &density_sketch<T>::is_empty, + "Returns True if the sketch is empty, otherwise False") + .def("get_k", &density_sketch<T>::get_k, + "Returns the configured parameter k") + .def("get_dim", &density_sketch<T>::get_dim, + "Returns the configured parameter dim") + .def("get_n", &density_sketch<T>::get_n, + "Returns the length of the input stream") + .def("get_num_retained", &density_sketch<T>::get_num_retained, + "Returns the number of retained items (samples) in the sketch") + .def("is_estimation_mode", &density_sketch<T>::is_estimation_mode, + "Returns True if the sketch is in estimation mode, otherwise False") + .def("get_estimate", &density_sketch<T>::get_estimate, py::arg("point"), + "Returns an approximate density at the given point") + .def("get_coreset", &dspy::density_sketch_get_coreset<T>, + "Returns the retained samples with weights") + .def("__str__", &density_sketch<T>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false, + "Produces a string summary of the sketch") + .def("to_string", &density_sketch<T>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false, + "Produces a string summary of the sketch") + ; +} + +void init_density(py::module &m) { + bind_density_sketch<float>(m, "density_floats_sketch"); + bind_density_sketch<double>(m, "density_doubles_sketch"); +} diff --git a/python/tests/density_test.py b/python/tests/density_test.py new file mode 100644 index 0000000..82f5bad --- /dev/null +++ b/python/tests/density_test.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest +from datasketches import density_doubles_sketch +import numpy as np + +class densityTest(unittest.TestCase): + def test_density_sketch(self): + k = 10 + dim = 3 + n = 1000 + + sketch = density_doubles_sketch(k, dim) + + self.assertEqual(sketch.get_k(), k) + self.assertEqual(sketch.get_dim(), dim) + self.assertTrue(sketch.is_empty()) + self.assertFalse(sketch.is_estimation_mode()) + self.assertEqual(sketch.get_n(), 0) + self.assertEqual(sketch.get_num_retained(), 0) + + for i in range(n): + sketch.update([i, i, i]) + + self.assertFalse(sketch.is_empty()) + self.assertTrue(sketch.is_estimation_mode()) + self.assertEqual(sketch.get_n(), n) + self.assertGreater(sketch.get_num_retained(), k) + self.assertLess(sketch.get_num_retained(), n) + self.assertGreater(sketch.get_estimate([n - 1, n - 1, n - 1]), 0) + + print(sketch.to_string()) + + list = sketch.get_coreset() + self.assertEqual(len(list), sketch.get_num_retained()) + + def test_density_merge(self): + sketch1 = density_doubles_sketch(10, 2) + sketch1.update([0, 0]) + sketch2 = density_doubles_sketch(10, 2) + sketch2.update([0, 1]) + sketch1.merge(sketch2) + self.assertEqual(sketch1.get_n(), 2) + self.assertEqual(sketch1.get_num_retained(), 2) + +if __name__ == '__main__': + unittest.main() --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
