This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch add_trim
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 0918a806f560190d39cffb730b921ff7842e7d43
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Mon Jun 12 12:56:33 2023 -0700

    added trim() and tests
---
 python/datasketches/TupleWrapper.py | 5 ++++-
 python/src/theta_wrapper.cpp        | 2 ++
 python/src/tuple_wrapper.cpp        | 1 +
 python/tests/theta_test.py          | 8 ++++++++
 python/tests/tuple_test.py          | 7 +++++++
 5 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/python/datasketches/TupleWrapper.py 
b/python/datasketches/TupleWrapper.py
index 3b38da0..97f0820 100644
--- a/python/datasketches/TupleWrapper.py
+++ b/python/datasketches/TupleWrapper.py
@@ -116,11 +116,14 @@ class update_tuple_sketch(tuple_sketch):
     """Returns a compacted form of the sketch, optionally sorting it."""
     return self._gadget.compact(ordered)
 
+  def trim(self):
+    """Removes retained entries in excess of the nominal size k (if any)."""
+    self._gadget.trim()
+
   def reset(self):
     """Resets the sketch to the initial empty state."""
     self._gadget.reset()
 
-
 class tuple_union:
   """An object that can merge Tuple Sketches. Requires a Policy object to 
handle merging Summaries."""
   _policy: TuplePolicy
diff --git a/python/src/theta_wrapper.cpp b/python/src/theta_wrapper.cpp
index f242ce5..033e6ca 100644
--- a/python/src/theta_wrapper.cpp
+++ b/python/src/theta_wrapper.cpp
@@ -76,6 +76,8 @@ void init_theta(py::module &m) {
          "Updates the sketch with the given string")
     .def("compact", &update_theta_sketch::compact, py::arg("ordered")=true,
          "Returns a compacted form of the sketch, optionally sorting it")
+    .def("trim", &update_theta_sketch::trim, "Removes retained entries in 
excess of the nominal size k (if any)")
+    .def("reset", &update_theta_sketch::reset, "Resets the sketch to the 
initial empty state")
   ;
 
   py::class_<compact_theta_sketch, theta_sketch>(m, "compact_theta_sketch")
diff --git a/python/src/tuple_wrapper.cpp b/python/src/tuple_wrapper.cpp
index 706621c..343181d 100644
--- a/python/src/tuple_wrapper.cpp
+++ b/python/src/tuple_wrapper.cpp
@@ -137,6 +137,7 @@ void init_tuple(py::module &m) {
          "Updates the sketch with the given string item and summary value")
     .def("compact", &py_update_tuple::compact, py::arg("ordered")=true,
          "Returns a compacted form of the sketch, optionally sorting it")
+    .def("trim", &py_update_tuple::trim, "Removes retained entries in excess 
of the nominal size k (if any)")
     .def("reset", &py_update_tuple::reset, "Resets the sketch to the initial 
empty state")
   ;
 
diff --git a/python/tests/theta_test.py b/python/tests/theta_test.py
index b3ca2da..f2798c4 100644
--- a/python/tests/theta_test.py
+++ b/python/tests/theta_test.py
@@ -54,6 +54,14 @@ class ThetaTest(unittest.TestCase):
           count = count + 1
         self.assertEqual(count, new_sk.get_num_retained())
 
+        num = sk.get_num_retained()
+        sk.trim()
+        self.assertLessEqual(sk.get_num_retained(), num)
+
+        sk.reset()
+        self.assertTrue(sk.is_empty())
+        self.assertEqual(sk.get_num_retained(), 0)
+
     def test_theta_set_operations(self):
         lgk = 12    # 2^k = 4096 rows in the table
         n = 1 << 18 # ~256k unique values
diff --git a/python/tests/tuple_test.py b/python/tests/tuple_test.py
index 2a298ef..6327599 100644
--- a/python/tests/tuple_test.py
+++ b/python/tests/tuple_test.py
@@ -75,6 +75,13 @@ class TupleTest(unittest.TestCase):
           cumSum += pair[1]
         self.assertEqual(cumSum, 5 * cts.get_num_retained())
 
+        num = sk.get_num_retained()
+        sk.trim()
+        self.assertLessEqual(sk.get_num_retained(), num)
+
+        sk.reset()
+        self.assertTrue(sk.is_empty())
+        self.assertEqual(sk.get_num_retained(), 0)
 
     def test_tuple_set_operations(self):
         lgk = 12    # 2^k = 4096 rows in the table


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to