This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch density_sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
The following commit(s) were added to refs/heads/density_sketch by this push:
new c354c56 python wrapper
c354c56 is described below
commit c354c56b14d75e55e1c8a21da53ab164141ebbaa
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Mon Dec 19 22:53:56 2022 -0800
python wrapper
---
python/CMakeLists.txt | 2 +
python/src/datasketches.cpp | 2 +
python/src/density_wrapper.cpp | 86 ++++++++++++++++++++++++++++++++++++++++++
python/tests/density_test.py | 62 ++++++++++++++++++++++++++++++
4 files changed, 152 insertions(+)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index bc85092..66132a4 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,6 +45,7 @@ target_link_libraries(python
sampling
req
quantiles
+ density
pybind11::module
)
@@ -75,6 +76,7 @@ target_sources(python
src/vo_wrapper.cpp
src/req_wrapper.cpp
src/quantiles_wrapper.cpp
+ src/density_wrapper.cpp
src/ks_wrapper.cpp
src/vector_of_kll.cpp
src/py_serde.cpp
diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp
index b34eea7..5a6ec1c 100644
--- a/python/src/datasketches.cpp
+++ b/python/src/datasketches.cpp
@@ -30,6 +30,7 @@ void init_theta(py::module& m);
void init_vo(py::module& m);
void init_req(py::module& m);
void init_quantiles(py::module& m);
+void init_density(py::module& m);
void init_vector_of_kll(py::module& m);
// supporting objects
@@ -45,6 +46,7 @@ PYBIND11_MODULE(_datasketches, m) {
init_vo(m);
init_req(m);
init_quantiles(m);
+ init_density(m);
init_vector_of_kll(m);
init_kolmogorov_smirnov(m);
diff --git a/python/src/density_wrapper.cpp b/python/src/density_wrapper.cpp
new file mode 100644
index 0000000..6ae5a47
--- /dev/null
+++ b/python/src/density_wrapper.cpp
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "density_sketch.hpp"
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/numpy.h>
+#include <vector>
+
+namespace py = pybind11;
+
+namespace datasketches {
+
+namespace python {
+
+template<typename T>
+py::list density_sketch_get_coreset(const density_sketch<T>& sketch) {
+ py::list list(sketch.get_num_retained());
+ unsigned i = 0;
+ for (auto pair: sketch) {
+ list[i++] = py::make_tuple(pair.first, pair.second);
+ }
+ return list;
+}
+
+}
+}
+
+namespace dspy = datasketches::python;
+
+template<typename T>
+void bind_density_sketch(py::module &m, const char* name) {
+ using namespace datasketches;
+
+ py::class_<density_sketch<T>>(m, name)
+ .def(py::init<uint16_t, uint32_t>(), py::arg("k"), py::arg("dim"))
+ .def("update", static_cast<void (density_sketch<T>::*)(const
std::vector<T>&)>(&density_sketch<T>::update),
+ "Updates the sketch with the given vector")
+ .def("update", static_cast<void
(density_sketch<T>::*)(std::vector<T>&&)>(&density_sketch<T>::update),
+ "Updates the sketch with the given vector")
+ .def("merge", static_cast<void (density_sketch<T>::*)(const
density_sketch<T>&)>(&density_sketch<T>::merge), py::arg("sketch"),
+ "Merges the provided sketch into this one")
+ .def("is_empty", &density_sketch<T>::is_empty,
+ "Returns True if the sketch is empty, otherwise False")
+ .def("get_k", &density_sketch<T>::get_k,
+ "Returns the configured parameter k")
+ .def("get_dim", &density_sketch<T>::get_dim,
+ "Returns the configured parameter dim")
+ .def("get_n", &density_sketch<T>::get_n,
+ "Returns the length of the input stream")
+ .def("get_num_retained", &density_sketch<T>::get_num_retained,
+ "Returns the number of retained items (samples) in the sketch")
+ .def("is_estimation_mode", &density_sketch<T>::is_estimation_mode,
+ "Returns True if the sketch is in estimation mode, otherwise False")
+ .def("get_estimate", &density_sketch<T>::get_estimate, py::arg("point"),
+ "Returns an approximate density at the given point")
+ .def("get_coreset", &dspy::density_sketch_get_coreset<T>,
+ "Returns the retained samples with weights")
+ .def("__str__", &density_sketch<T>::to_string,
py::arg("print_levels")=false, py::arg("print_items")=false,
+ "Produces a string summary of the sketch")
+ .def("to_string", &density_sketch<T>::to_string,
py::arg("print_levels")=false, py::arg("print_items")=false,
+ "Produces a string summary of the sketch")
+ ;
+}
+
+void init_density(py::module &m) {
+ bind_density_sketch<float>(m, "density_floats_sketch");
+ bind_density_sketch<double>(m, "density_doubles_sketch");
+}
diff --git a/python/tests/density_test.py b/python/tests/density_test.py
new file mode 100644
index 0000000..82f5bad
--- /dev/null
+++ b/python/tests/density_test.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from datasketches import density_doubles_sketch
+import numpy as np
+
+class densityTest(unittest.TestCase):
+ def test_density_sketch(self):
+ k = 10
+ dim = 3
+ n = 1000
+
+ sketch = density_doubles_sketch(k, dim)
+
+ self.assertEqual(sketch.get_k(), k)
+ self.assertEqual(sketch.get_dim(), dim)
+ self.assertTrue(sketch.is_empty())
+ self.assertFalse(sketch.is_estimation_mode())
+ self.assertEqual(sketch.get_n(), 0)
+ self.assertEqual(sketch.get_num_retained(), 0)
+
+ for i in range(n):
+ sketch.update([i, i, i])
+
+ self.assertFalse(sketch.is_empty())
+ self.assertTrue(sketch.is_estimation_mode())
+ self.assertEqual(sketch.get_n(), n)
+ self.assertGreater(sketch.get_num_retained(), k)
+ self.assertLess(sketch.get_num_retained(), n)
+ self.assertGreater(sketch.get_estimate([n - 1, n - 1, n - 1]), 0)
+
+ print(sketch.to_string())
+
+ list = sketch.get_coreset()
+ self.assertEqual(len(list), sketch.get_num_retained())
+
+ def test_density_merge(self):
+ sketch1 = density_doubles_sketch(10, 2)
+ sketch1.update([0, 0])
+ sketch2 = density_doubles_sketch(10, 2)
+ sketch2.update([0, 1])
+ sketch1.merge(sketch2)
+ self.assertEqual(sketch1.get_n(), 2)
+ self.assertEqual(sketch1.get_num_retained(), 2)
+
+if __name__ == '__main__':
+ unittest.main()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]