This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch density_custom_kernel
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 2a32802a311eb6cdd0d2405c263ce983114c1ff8
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Mon Dec 19 22:53:56 2022 -0800

    python wrapper
---
 python/CMakeLists.txt          |  2 +
 python/src/datasketches.cpp    |  8 ++++
 python/src/density_wrapper.cpp | 86 ++++++++++++++++++++++++++++++++++++++++++
 python/tests/density_test.py   | 62 ++++++++++++++++++++++++++++++
 4 files changed, 158 insertions(+)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 0eff12a..508e173 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -47,6 +47,7 @@ target_link_libraries(python
     req
     quantiles
     count
+    density
     pybind11::module
 )
 
@@ -78,6 +79,7 @@ target_sources(python
     src/vo_wrapper.cpp
     src/req_wrapper.cpp
     src/quantiles_wrapper.cpp
+    src/density_wrapper.cpp
     src/ks_wrapper.cpp
     src/count_wrapper.cpp
     src/vector_of_kll.cpp
diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp
index 2186a33..e10074a 100644
--- a/python/src/datasketches.cpp
+++ b/python/src/datasketches.cpp
@@ -31,7 +31,11 @@ void init_tuple(py::module& m);
 void init_vo(py::module& m);
 void init_req(py::module& m);
 void init_quantiles(py::module& m);
+<<<<<<< HEAD
 void init_count_min(py::module& m);
+=======
+void init_density(py::module& m);
+>>>>>>> c354c56 (python wrapper)
 void init_vector_of_kll(py::module& m);
 
 // supporting objects
@@ -48,7 +52,11 @@ PYBIND11_MODULE(_datasketches, m) {
   init_vo(m);
   init_req(m);
   init_quantiles(m);
+<<<<<<< HEAD
   init_count_min(m);
+=======
+  init_density(m);
+>>>>>>> c354c56 (python wrapper)
   init_vector_of_kll(m);
 
   init_kolmogorov_smirnov(m);
diff --git a/python/src/density_wrapper.cpp b/python/src/density_wrapper.cpp
new file mode 100644
index 0000000..6ae5a47
--- /dev/null
+++ b/python/src/density_wrapper.cpp
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "density_sketch.hpp"
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/numpy.h>
+#include <vector>
+
+namespace py = pybind11;
+
+namespace datasketches {
+
+namespace python {
+
+template<typename T>
+py::list density_sketch_get_coreset(const density_sketch<T>& sketch) {
+  py::list list(sketch.get_num_retained());
+  unsigned i = 0;
+  for (auto pair: sketch) {
+    list[i++] = py::make_tuple(pair.first, pair.second);
+  }
+  return list;
+}
+
+}
+}
+
+namespace dspy = datasketches::python;
+
+template<typename T>
+void bind_density_sketch(py::module &m, const char* name) {
+  using namespace datasketches;
+
+  py::class_<density_sketch<T>>(m, name)
+    .def(py::init<uint16_t, uint32_t>(), py::arg("k"), py::arg("dim"))
+    .def("update", static_cast<void (density_sketch<T>::*)(const 
std::vector<T>&)>(&density_sketch<T>::update),
+        "Updates the sketch with the given vector")
+    .def("update", static_cast<void 
(density_sketch<T>::*)(std::vector<T>&&)>(&density_sketch<T>::update),
+        "Updates the sketch with the given vector")
+    .def("merge", static_cast<void (density_sketch<T>::*)(const 
density_sketch<T>&)>(&density_sketch<T>::merge), py::arg("sketch"),
+        "Merges the provided sketch into this one")
+    .def("is_empty", &density_sketch<T>::is_empty,
+        "Returns True if the sketch is empty, otherwise False")
+    .def("get_k", &density_sketch<T>::get_k,
+        "Returns the configured parameter k")
+    .def("get_dim", &density_sketch<T>::get_dim,
+        "Returns the configured parameter dim")
+    .def("get_n", &density_sketch<T>::get_n,
+        "Returns the length of the input stream")
+    .def("get_num_retained", &density_sketch<T>::get_num_retained,
+        "Returns the number of retained items (samples) in the sketch")
+    .def("is_estimation_mode", &density_sketch<T>::is_estimation_mode,
+        "Returns True if the sketch is in estimation mode, otherwise False")
+    .def("get_estimate", &density_sketch<T>::get_estimate, py::arg("point"),
+        "Returns an approximate density at the given point")
+    .def("get_coreset", &dspy::density_sketch_get_coreset<T>,
+        "Returns the retained samples with weights")
+    .def("__str__", &density_sketch<T>::to_string, 
py::arg("print_levels")=false, py::arg("print_items")=false,
+        "Produces a string summary of the sketch")
+    .def("to_string", &density_sketch<T>::to_string, 
py::arg("print_levels")=false, py::arg("print_items")=false,
+        "Produces a string summary of the sketch")
+    ;
+}
+
+void init_density(py::module &m) {
+  bind_density_sketch<float>(m, "density_floats_sketch");
+  bind_density_sketch<double>(m, "density_doubles_sketch");
+}
diff --git a/python/tests/density_test.py b/python/tests/density_test.py
new file mode 100644
index 0000000..82f5bad
--- /dev/null
+++ b/python/tests/density_test.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import unittest
+from datasketches import density_doubles_sketch
+import numpy as np
+
+class densityTest(unittest.TestCase):
+  def test_density_sketch(self):
+    k = 10
+    dim = 3
+    n = 1000
+
+    sketch = density_doubles_sketch(k, dim)
+
+    self.assertEqual(sketch.get_k(), k)
+    self.assertEqual(sketch.get_dim(), dim)
+    self.assertTrue(sketch.is_empty())
+    self.assertFalse(sketch.is_estimation_mode())
+    self.assertEqual(sketch.get_n(), 0)
+    self.assertEqual(sketch.get_num_retained(), 0)
+
+    for i in range(n):
+      sketch.update([i, i, i])
+
+    self.assertFalse(sketch.is_empty())
+    self.assertTrue(sketch.is_estimation_mode())
+    self.assertEqual(sketch.get_n(), n)
+    self.assertGreater(sketch.get_num_retained(), k)
+    self.assertLess(sketch.get_num_retained(), n)
+    self.assertGreater(sketch.get_estimate([n - 1, n - 1, n - 1]), 0)
+
+    print(sketch.to_string())
+
+    list = sketch.get_coreset()
+    self.assertEqual(len(list), sketch.get_num_retained())
+
+  def test_density_merge(self):
+    sketch1 = density_doubles_sketch(10, 2)
+    sketch1.update([0, 0])
+    sketch2 = density_doubles_sketch(10, 2)
+    sketch2.update([0, 1])
+    sketch1.merge(sketch2)
+    self.assertEqual(sketch1.get_n(), 2)
+    self.assertEqual(sketch1.get_num_retained(), 2)
+
+if __name__ == '__main__':
+    unittest.main()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to