This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch python
in repository https://gitbox.apache.org/repos/asf/datasketches-spark.git
The following commit(s) were added to refs/heads/python by this push:
new 50794a0 improve kll merge test
50794a0 is described below
commit 50794a0ac7f54d89a8af27d1c96e6ac92bc9642b
Author: Jon Malkin <[email protected]>
AuthorDate: Fri Feb 14 09:47:31 2025 -0800
improve kll merge test
---
python/tests/kll_test.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/python/tests/kll_test.py b/python/tests/kll_test.py
index 1c3cf38..a37c2f3 100644
--- a/python/tests/kll_test.py
+++ b/python/tests/kll_test.py
@@ -57,12 +57,17 @@ def test_kll_merge(spark):
df_agg = df.groupBy("id").agg(kll_sketch_double_agg_build("value",
k).alias("sketch"))
assert(df_agg.count() == 2)
+ # merge and get a few attributes to check
result = df_agg.select(
kll_sketch_double_agg_merge("sketch").alias("sketch")
+ ).select(
+ "sketch",
+ kll_sketch_double_get_min("sketch").alias("min"),
+ kll_sketch_double_get_max("sketch").alias("max")
).first()
sk = result["sketch"]
assert(sk.n == 2 * n)
assert(sk.k == k)
- assert(sk.get_min_value() == 1.0)
- assert(sk.get_max_value() == 2 * n)
+ assert(sk.get_min_value() == result["min"])
+ assert(sk.get_max_value() == result["max"])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]