This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch_test
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit a2c77e329d329f06588dc444fc2ecb8f692ea1db
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Thu Nov 7 18:21:52 2024 -0800

    tuple sketch tests
---
 tests/tuple_sketch_int64_test.js | 280 +++++++++++++++++++++++++++++++++++++++
 tuple/tuple_sketch_int64.cpp     |   5 +-
 2 files changed, 284 insertions(+), 1 deletion(-)

diff --git a/tests/tuple_sketch_int64_test.js b/tests/tuple_sketch_int64_test.js
new file mode 100644
index 0000000..7825b5d
--- /dev/null
+++ b/tests/tuple_sketch_int64_test.js
@@ -0,0 +1,280 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const { generate_udf_test, generate_udaf_test } = require('./unit_test_utils');
+
+// using defaults
+
+const tuple_1 = 
`FROM_BASE64('AgMJAQAazJMDAAAAAAAAALcMbuWor0AIAQAAAAAAAACFf0C2icflNAEAAAAAAAAAF8EdUoUHAXsBAAAAAAAAAA==')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_string", {
+  input_columns: [`str`, `1`],
+  input_rows: `SELECT * FROM UNNEST(['a', 'b', 'c']) AS str`,
+  expected_output: tuple_1
+});
+
+const tuple_2 = 
`FROM_BASE64('AgMJAQAazJMDAAAAAAAAALcMbuWor0AIAQAAAAAAAABOPehbCCvBLgEAAAAAAAAA4F817XUdAHMBAAAAAAAAAA==')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_string", {
+  input_columns: [`str`, `1`],
+  input_rows: `SELECT * FROM UNNEST(['c', 'd', 'e']) AS str`,
+  expected_output: tuple_2
+});
+
+const tuple_union_1 = 
`FROM_BASE64('AgMJAQAazJMFAAAAAAAAALcMbuWor0AIAgAAAAAAAABOPehbCCvBLgEAAAAAAAAAhX9AtonH5TQBAAAAAAAAAOBfNe11HQBzAQAAAAAAAAAXwR1ShQcBewEAAAAAAAAA')`;
+
+generate_udf_test("tuple_sketch_int64_union", [{
+  inputs: [ tuple_1, tuple_2 ],
+  expected_output: tuple_union_1
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_estimate", [{
+  inputs: [ tuple_union_1 ],
+  expected_output: 5
+}]);
+
+generate_udf_test("tuple_sketch_int64_to_string", [{
+  inputs: [ tuple_union_1 ],
+  expected_output: `'''### Tuple sketch summary:
+   num retained entries : 5
+   seed hash            : 37836
+   empty?               : false
+   ordered?             : true
+   estimation mode?     : false
+   theta (fraction)     : 1
+   theta (raw 64-bit)   : 9223372036854775807
+   estimate             : 5
+   lower bound 95% conf : 5
+   upper bound 95% conf : 5
+### End sketch summary
+'''`
+}]);
+
+const tuple_intersection = `FROM_BASE64('AQMJAQAazJO3DG7lqK9ACAIAAAAAAAAA')`;
+
+generate_udf_test("tuple_sketch_int64_intersection", [{
+  inputs: [ tuple_1, tuple_2 ],
+  expected_output: tuple_intersection
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_estimate", [{
+  inputs: [ tuple_intersection ],
+  expected_output: 1
+}]);
+
+const tuple_a_not_b = 
`FROM_BASE64('AgMJAQAazJMCAAAAAAAAAIV/QLaJx+U0AQAAAAAAAAAXwR1ShQcBewEAAAAAAAAA')`;
+
+generate_udf_test("tuple_sketch_int64_a_not_b", [{
+  inputs: [ tuple_1, tuple_2 ],
+  expected_output: tuple_a_not_b
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_estimate", [{
+  inputs: [ tuple_a_not_b ],
+  expected_output: 2
+}]);
+
+generate_udf_test("tuple_sketch_int64_jaccard_similarity", [{
+  inputs: [ tuple_1, tuple_2 ],
+  expected_output: `STRUCT(0.2 AS lower_bound, 0.2 AS estimate, 0.2 AS 
upper_bound)`
+}]);
+
+const tuple_3 = 
`FROM_BASE64('AgMJAQAazJMDAAAAAAAAABX5fcu9hqEFAQAAAAAAAADDl/wSgXCdHgEAAAAAAAAAukCzwdoGaV0BAAAAAAAAAA==')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_int64", {
+  input_columns: [`value`, `1`],
+  input_rows: `SELECT * FROM UNNEST([1, 2, 3]) AS value`,
+  expected_output: tuple_3
+});
+
+const tuple_4 = 
`FROM_BASE64('AgMJAQAazJMDAAAAAAAAAEDeLuHJ2z0IAQAAAAAAAAC9MnNyRpHMFAEAAAAAAAAAukCzwdoGaV0BAAAAAAAAAA==')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_int64", {
+  input_columns: [`value`, `1`],
+  input_rows: `SELECT * FROM UNNEST([3, 4, 5]) AS value`,
+  expected_output: tuple_4
+});
+
+const tuple_union_2 = 
`FROM_BASE64('AgMJAQAazJMFAAAAAAAAABX5fcu9hqEFAQAAAAAAAABA3i7hyds9CAEAAAAAAAAAvTJzckaRzBQBAAAAAAAAAMOX/BKBcJ0eAQAAAAAAAAC6QLPB2gZpXQIAAAAAAAAA')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_union", {
+  input_columns: [`sketch`],
+  input_rows: `SELECT * FROM UNNEST([${tuple_3}, ${tuple_4}]) AS sketch`,
+  expected_output: tuple_union_2
+});
+
+generate_udf_test("tuple_sketch_int64_get_estimate_and_bounds", [{
+  inputs: [ tuple_union_2, 3 ],
+  expected_output: `STRUCT(5 AS estimate, 5 AS lower_bound, 5 AS upper_bound)`
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_theta", [{
+  inputs: [ tuple_union_2 ],
+  expected_output: 1
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_num_retained", [{
+  inputs: [ tuple_union_2 ],
+  expected_output: 5
+}]);
+
+generate_udf_test("tuple_sketch_int64_from_theta_sketch", [{
+  inputs: [ `FROM_BASE64('AQQDPwEazJMDEIFfUcrcGW6ylF+DQ0nLOjDZ/9ze6gyQ')`, 1 ],
+  expected_output: tuple_1
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_sum_estimate_and_bounds", [{
+  inputs: [ tuple_union_2, 2 ],
+  expected_output: `STRUCT(6 AS sum_estimate, 6 AS sum_lower_bound, 6 AS 
sum_upper_bound)`
+}]);
+
+generate_udf_test("tuple_sketch_int64_filter_low_high", [{
+  inputs: [ tuple_union_2, 1, 1 ],
+  expected_output: 
`FROM_BASE64('AgMJAQAazJMEAAAAAAAAABX5fcu9hqEFAQAAAAAAAABA3i7hyds9CAEAAAAAAAAAvTJzckaRzBQBAAAAAAAAAMOX/BKBcJ0eAQAAAAAAAAA=')`
+}]);
+
+
+// using full signatures
+
+const tuple_8_111_09_min_1 = 
`FROM_BASE64('AwMJAQAajNMDAAAAAAAAAAAAAAAAMzNzVFGNpRzS1xYBAAAAAAAAACc7TbuPjPQXAQAAAAAAAAD5mKk7pGL+SgEAAAAAAAAA')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_string_lgk_seed_p_mode", {
+  input_columns: [`str`, `1`, 'STRUCT(8 AS lgk, 111 AS seed, 0.9 AS p, "MIN" 
AS mode) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST(['a', 'b', 'c']) AS str`,
+  expected_output: tuple_8_111_09_min_1
+});
+
+const tuple_8_111_09_min_2 = 
`FROM_BASE64('AwMJAQAajNMDAAAAAAAAAAAAAAAAMzNzVFGNpRzS1xYBAAAAAAAAAOOWbNjz/z4eAQAAAAAAAACi02OpMeqiYgEAAAAAAAAA')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_string_lgk_seed_p_mode", {
+  input_columns: [`str`, `1`, 'STRUCT(8 AS lgk, 111 AS seed, 0.9 AS p, "MIN" 
AS mode) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST(['c', 'd', 'e']) AS str`,
+  expected_output: tuple_8_111_09_min_2
+});
+
+const tuple_union_8_111_min = 
`FROM_BASE64('AwMJAQAajNMFAAAAAAAAAAAAAAAAMzNzVFGNpRzS1xYBAAAAAAAAACc7TbuPjPQXAQAAAAAAAADjlmzY8/8+HgEAAAAAAAAA+ZipO6Ri/koBAAAAAAAAAKLTY6kx6qJiAQAAAAAAAAA=')`;
+
+generate_udf_test("tuple_sketch_int64_union_lgk_seed_mode", [{
+  inputs: [ tuple_8_111_09_min_1, tuple_8_111_09_min_2, 8, 111, `"MIN"` ],
+  expected_output: tuple_union_8_111_min
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_estimate_seed", [{
+  inputs: [ tuple_union_8_111_min, 111 ],
+  expected_output: 5.5555557027275215
+}]);
+
+generate_udf_test("tuple_sketch_int64_to_string_seed", [{
+  inputs: [ tuple_union_8_111_min, 111 ],
+  expected_output: `'''### Tuple sketch summary:
+   num retained entries : 5
+   seed hash            : 54156
+   empty?               : false
+   ordered?             : true
+   estimation mode?     : true
+   theta (fraction)     : 0.9
+   theta (raw 64-bit)   : 8301034613266972672
+   estimate             : 5.55556
+   lower bound 95% conf : 5
+   upper bound 95% conf : 9
+### End sketch summary
+'''`
+}]);
+
+const tuple_intersection_111_min = 
`FROM_BASE64('AwMJAQAajNMBAAAAAAAAAAAAAAAAMzNzVFGNpRzS1xYBAAAAAAAAAA==')`;
+
+generate_udf_test("tuple_sketch_int64_intersection_seed_mode", [{
+  inputs: [ tuple_8_111_09_min_1, tuple_8_111_09_min_2, 111, `"MIN"` ],
+  expected_output: tuple_intersection_111_min
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_estimate_seed", [{
+  inputs: [ tuple_intersection_111_min, 111 ],
+  expected_output: 1.1111111405455043
+}]);
+
+const tuple_a_not_b_111 = 
`FROM_BASE64('AwMJAQAajNMCAAAAAAAAAAAAAAAAMzNzJztNu4+M9BcBAAAAAAAAAPmYqTukYv5KAQAAAAAAAAA=')`;
+
+generate_udf_test("tuple_sketch_int64_a_not_b_seed", [{
+  inputs: [ tuple_8_111_09_min_1, tuple_8_111_09_min_2, 111 ],
+  expected_output: tuple_a_not_b_111
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_estimate_seed", [{
+  inputs: [ tuple_a_not_b_111, 111 ],
+  expected_output: 2.2222222810910086
+}]);
+
+generate_udf_test("tuple_sketch_int64_jaccard_similarity_seed", [{
+  inputs: [ tuple_8_111_09_min_1, tuple_8_111_09_min_2, 111 ],
+  expected_output: `STRUCT(0.05868247546115801 AS lower_bound, 0.2 AS 
estimate, 0.4517325934817119 AS upper_bound)`
+}]);
+
+const tuple_8_111_09_max_3 = 
`FROM_BASE64('AwMJAQAajNMDAAAAAAAAAAAAAAAAMzNzpIWo8+CEJzgBAAAAAAAAACCibaX/wM1AAQAAAAAAAADExpvpLnp1SAEAAAAAAAAA')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_int64_lgk_seed_p_mode", {
+  input_columns: [`value`, `1`, 'STRUCT(8 AS lgk, 111 AS seed, 0.9 AS p, "MAX" 
AS mode) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST([1, 2, 3]) AS value`,
+  expected_output: tuple_8_111_09_max_3
+});
+
+const tuple_8_111_09_max_4 = 
`FROM_BASE64('AwMJAQAajNMDAAAAAAAAAAAAAAAAMzNzTrdXenJE1wACAAAAAAAAANlaaoMLu9UFAgAAAAAAAAAgom2l/8DNQAIAAAAAAAAA')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_int64_lgk_seed_p_mode", {
+  input_columns: [`value`, `2`, 'STRUCT(8 AS lgk, 111 AS seed, 0.9 AS p, "MAX" 
AS mode) NOT AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST([3, 4, 5]) AS value`,
+  expected_output: tuple_8_111_09_max_4
+});
+
+const tuple_union_8_111_09_max = 
`FROM_BASE64('AwMJAQAajNMFAAAAAAAAAAAAAAAAMzNzTrdXenJE1wACAAAAAAAAANlaaoMLu9UFAgAAAAAAAACkhajz4IQnOAEAAAAAAAAAIKJtpf/AzUACAAAAAAAAAMTGm+kuenVIAQAAAAAAAAA=')`;
+
+generate_udaf_test("tuple_sketch_int64_agg_union_lgk_seed_mode", {
+  input_columns: [`sketch`, 'STRUCT(8 AS lgk, 111 AS seed, "MAX" AS mode) NOT 
AGGREGATE'],
+  input_rows: `SELECT * FROM UNNEST([${tuple_8_111_09_max_3}, 
${tuple_8_111_09_max_4}]) AS sketch`,
+  expected_output: tuple_union_8_111_09_max
+});
+
+generate_udf_test("tuple_sketch_int64_get_estimate_and_bounds_seed", [{
+  inputs: [ tuple_union_8_111_09_max, 3, 111 ],
+  expected_output: `STRUCT(5.5555557027275215 AS estimate, 5 AS lower_bound, 
11 AS upper_bound)`
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_theta_seed", [{
+  inputs: [ tuple_union_8_111_09_max, 111 ],
+  expected_output: 0.8999999761581421
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_num_retained_seed", [{
+  inputs: [ tuple_union_8_111_09_max, 111 ],
+  expected_output: 5
+}]);
+
+generate_udf_test("tuple_sketch_int64_from_theta_sketch_seed", [{
+  inputs: [ 
`FROM_BASE64('AgQDPgEajNMAAAAAADMzcwNbX0hyljVFUBHLpzFb/p08wnWFIBcXdIA=')`, 1, 
111 ],
+  expected_output: tuple_8_111_09_min_1
+}]);
+
+generate_udf_test("tuple_sketch_int64_get_sum_estimate_and_bounds_seed", [{
+  inputs: [ tuple_union_8_111_09_max, 2, 111 ],
+  expected_output: `STRUCT(8.888889124364034 AS sum_estimate, 8 AS 
sum_lower_bound, 14.399999999999999 AS sum_upper_bound)`
+}]);
+
+generate_udf_test("tuple_sketch_int64_filter_low_high_seed", [{
+  inputs: [ tuple_union_8_111_09_max, 1, 1, 111 ],
+  expected_output: 
`FROM_BASE64('AwMJAQAajNMCAAAAAAAAAAAAAAAAMzNzpIWo8+CEJzgBAAAAAAAAAMTGm+kuenVIAQAAAAAAAAA=')`
+}]);
diff --git a/tuple/tuple_sketch_int64.cpp b/tuple/tuple_sketch_int64.cpp
index 45fd15b..776f60d 100644
--- a/tuple/tuple_sketch_int64.cpp
+++ b/tuple/tuple_sketch_int64.cpp
@@ -36,7 +36,10 @@ class tuple_update_policy {
 public:
   tuple_update_policy(tuple_mode mode): mode_(mode) {}
   Summary create() const {
-    return S();
+    if (mode_ == ONE) return 1;
+    else if (mode_ == MIN) return std::numeric_limits<Summary>::max();
+    else if (mode_ == MAX) return std::numeric_limits<Summary>::min();
+    return 0;
   }
   void update(S& summary, const U& update) const {
     if (mode_ == SUM) summary += update;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to