This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch kll_default
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit b441deefc55619ef7a10f1d0173317cc29383cbd
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Oct 22 11:20:51 2024 -0700

    functions with default k
---
 kll/sqlx/kll_sketch_float_build.sqlx               | 87 ++--------------------
 ...at_build.sqlx => kll_sketch_float_build_k.sqlx} |  0
 kll/sqlx/kll_sketch_float_merge.sqlx               | 87 ++--------------------
 ...at_merge.sqlx => kll_sketch_float_merge_k.sqlx} |  0
 kll/test/kll_sketch_example.sql                    |  4 +-
 kll/test/kll_sketch_test.sql                       | 39 ++++++----
 6 files changed, 36 insertions(+), 181 deletions(-)

diff --git a/kll/sqlx/kll_sketch_float_build.sqlx 
b/kll/sqlx/kll_sketch_float_build.sqlx
index a6c4224..0201610 100644
--- a/kll/sqlx/kll_sketch_float_build.sqlx
+++ b/kll/sqlx/kll_sketch_float_build.sqlx
@@ -19,95 +19,18 @@
 
 config { hasOutput: true }
 
-CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value FLOAT64, k INT NOT 
AGGREGATE)
+CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value FLOAT64)
 RETURNS BYTES
-LANGUAGE js
 OPTIONS (
-  library=["${JS_BUCKET}/kll_sketch_float.mjs"],
   description = '''Creates a sketch that represents the distribution of the 
given column.
 
 Param value: the column of FLOAT64 values.
-Param k: the sketch accuracy/size parameter as an INT in the range [8, 65535].
+Defaults: k = 200.
 Returns: a KLL Sketch, as bytes.
 
 For more information:
  - https://datasketches.apache.org/docs/KLL/KLLSketch.html
 '''
-) AS R"""
-import ModuleFactory from "${JS_BUCKET}/kll_sketch_float.mjs";
-var Module = await ModuleFactory();
-const default_k = Number(Module.DEFAULT_K);
-
-// UDAF interface
-export function initialState(k) {
-  try {
-    var state = {
-      k: k == null ? default_k : Number(k),
-    };
-    state.sketch = new Module.kll_sketch_float(state.k);
-    return state;
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  }
-}
-
-export function aggregate(state, value) {
-  try {
-    if (state.sketch == null) { // for transition deserialize-aggregate
-      state.sketch = new Module.kll_sketch_float(state.k);
-    }
-    state.sketch.update(value);
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  }
-}
-
-export function serialize(state) {
-  if (state.sketch == null) return state; // for transition 
deserialize-serialize
-  try {
-    // for prior transition deserialize-aggregate
-    // merge aggregated and serialized state
-    if (state.sketch != null && state.serialized != null) {
-      sketch.merge(state.serialized);
-    }
-    return {
-      k: state.k,
-      serialized: state.sketch.serializeAsUint8Array()
-    };
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  } finally {
-    state.sketch.delete();
-  }
-}
-
-export function deserialize(serialized) {
-  return serialized;
-}
-
-export function merge(state, other_state) {
-  try {
-    if (state.sketch == null) {
-      state.sketch = new Module.kll_sketch_float(state.k);
-    }
-    if (state.serialized != null) {
-      state.sketch.merge(state.serialized);
-      state.serialized = null;
-    }
-    if (other_state.serialized != null) {
-      state.sketch.merge(other_state.serialized);
-      other_state.serialized = null;
-    }
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  }
-}
-
-export function finalize(state) {
-  return serialize(state).serialized;
-}
-""";
+) AS (
+  ${ref("kll_sketch_float_build_k")}(value, NULL)
+);
diff --git a/kll/sqlx/kll_sketch_float_build.sqlx 
b/kll/sqlx/kll_sketch_float_build_k.sqlx
similarity index 100%
copy from kll/sqlx/kll_sketch_float_build.sqlx
copy to kll/sqlx/kll_sketch_float_build_k.sqlx
diff --git a/kll/sqlx/kll_sketch_float_merge.sqlx 
b/kll/sqlx/kll_sketch_float_merge.sqlx
index 7a5588e..9135a6b 100644
--- a/kll/sqlx/kll_sketch_float_merge.sqlx
+++ b/kll/sqlx/kll_sketch_float_merge.sqlx
@@ -19,95 +19,18 @@
 
 config { hasOutput: true }
 
-CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(sketch BYTES, k INT NOT 
AGGREGATE)
+CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(sketch BYTES)
 RETURNS BYTES
-LANGUAGE js
 OPTIONS (
-  library=["${JS_BUCKET}/kll_sketch_float.mjs"],
   description = '''Merges sketches from the given column.
 
 Param sketch: the column of values.
-Param k: the sketch accuracy/size parameter as an integer in the range [8, 
65535].
+Defaluts: k = 200.
 Returns: a serialized KLL sketch as BYTES.
 
 For more information:
  - https://datasketches.apache.org/docs/KLL/KLLSketch.html
 '''
-) AS R"""
-import ModuleFactory from "${JS_BUCKET}/kll_sketch_float.mjs";
-var Module = await ModuleFactory();
-const default_k = Number(Module.DEFAULT_K);
-
-// UDAF interface
-export function initialState(k) {
-  try {
-    var state = {
-      k: k == null ? default_k : Number(k),
-    };
-    state.sketch = new Module.kll_sketch_float(state.k);
-    return state;
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  }
-}
-
-export function aggregate(state, sketch) {
-  try {
-    if (state.sketch == null) { // for transition deserialize-aggregate
-      state.sketch = new Module.kll_sketch_float(state.k);
-    }
-    state.sketch.merge(sketch);
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  }
-}
-
-export function serialize(state) {
-  if (state.sketch == null) return state; // for transition 
deserialize-serialize
-  try {
-    // for prior transition deserialize-aggregate
-    // merge aggregated and serialized state
-    if (state.sketch != null && state.serialized != null) {
-      sketch.merge(state.serialized);
-    }
-    return {
-      k: state.k,
-      serialized: state.sketch.serializeAsUint8Array()
-    };
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  } finally {
-    state.sketch.delete();
-  }
-}
-
-export function deserialize(serialized) {
-  return serialized;
-}
-
-export function merge(state, other_state) {
-  try {
-    if (state.sketch == null) {
-      state.sketch = new Module.kll_sketch_float(state.k);
-    }
-    if (state.serialized != null) {
-      state.sketch.merge(state.serialized);
-      state.serialized = null;
-    }
-    if (other_state.serialized != null) {
-      state.sketch.merge(other_state.serialized);
-      other_state.serialized = null;
-    }
-  } catch (e) {
-    if (e.message != null) throw e;
-    throw new Error(Module.getExceptionMessage(e));
-  }
-}
-
-export function finalize(state) {
-  return serialize(state).serialized;
-}
-""";
+) AS (
+  ${ref("kll_sketch_float_merge_k")}(sketch, NULL)
+);
diff --git a/kll/sqlx/kll_sketch_float_merge.sqlx 
b/kll/sqlx/kll_sketch_float_merge_k.sqlx
similarity index 100%
copy from kll/sqlx/kll_sketch_float_merge.sqlx
copy to kll/sqlx/kll_sketch_float_merge_k.sqlx
diff --git a/kll/test/kll_sketch_example.sql b/kll/test/kll_sketch_example.sql
index a0cfdd7..9fa28b7 100644
--- a/kll/test/kll_sketch_example.sql
+++ b/kll/test/kll_sketch_example.sql
@@ -32,7 +32,7 @@ CREATE OR REPLACE TABLE `$BQ_DATASET`.agg_sample_data AS
 SELECT
   group_key,
   count(*) AS total_count,
-  `$BQ_DATASET`.kll_sketch_float_build(x, 250) AS kll_sketch
+  `$BQ_DATASET`.kll_sketch_float_build_k(x, 250) AS kll_sketch
 FROM `$BQ_DATASET`.sample_data
 GROUP BY group_key;
 
@@ -40,7 +40,7 @@ GROUP BY group_key;
 
 WITH agg_data AS (
   SELECT
-    `$BQ_DATASET`.kll_sketch_float_merge(kll_sketch, 250) as merged_kll_sketch,
+    `$BQ_DATASET`.kll_sketch_float_merge_k(kll_sketch, 250) as 
merged_kll_sketch,
     SUM(total_count) as total_count
   FROM `$BQ_DATASET`.agg_sample_data
 )
diff --git a/kll/test/kll_sketch_test.sql b/kll/test/kll_sketch_test.sql
index fc36aa3..e62b12c 100644
--- a/kll/test/kll_sketch_test.sql
+++ b/kll/test/kll_sketch_test.sql
@@ -19,51 +19,60 @@
 
 create or replace table `$BQ_DATASET`.kll_sketch(sketch bytes);
 
+# using defalut
 insert into `$BQ_DATASET`.kll_sketch
-(select `$BQ_DATASET`.kll_sketch_float_build(value, 200) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value);
+(select `$BQ_DATASET`.kll_sketch_float_build(value) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value);
+
+# using full signature
 insert into `$BQ_DATASET`.kll_sketch
-(select `$BQ_DATASET`.kll_sketch_float_build(value, 200) from 
unnest([11,12,13,14,15,16,17,18,19,20]) as value);
+(select `$BQ_DATASET`.kll_sketch_float_build_k(value, 100) from 
unnest([11,12,13,14,15,16,17,18,19,20]) as value);
 
 select `$BQ_DATASET`.kll_sketch_float_to_string(sketch) from 
`$BQ_DATASET`.kll_sketch;
 
+# using default
+select 
`$BQ_DATASET`.kll_sketch_float_to_string(`$BQ_DATASET`.kll_sketch_float_merge(sketch))
 from `$BQ_DATASET`.kll_sketch;
+
+# using full signature
+select 
`$BQ_DATASET`.kll_sketch_float_to_string(`$BQ_DATASET`.kll_sketch_float_merge_k(sketch,
 100)) from `$BQ_DATASET`.kll_sketch;
+
 # expected 0.5
-select 
`$BQ_DATASET`.kll_sketch_float_get_rank(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null), 10, true) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_rank(`$BQ_DATASET`.kll_sketch_float_merge(sketch),
 10, true) from `$BQ_DATASET`.kll_sketch;
 
 # expected 10
-select 
`$BQ_DATASET`.kll_sketch_float_get_quantile(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null), 0.5, true) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_quantile(`$BQ_DATASET`.kll_sketch_float_merge(sketch),
 0.5, true) from `$BQ_DATASET`.kll_sketch;
 
 # expected 20
-select 
`$BQ_DATASET`.kll_sketch_float_get_n(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null)) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_n(`$BQ_DATASET`.kll_sketch_float_merge(sketch))
 from `$BQ_DATASET`.kll_sketch;
 
 # expected 0.5, 0.5
-select 
`$BQ_DATASET`.kll_sketch_float_get_pmf(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null), [10.0], true) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_pmf(`$BQ_DATASET`.kll_sketch_float_merge(sketch),
 [10.0], true) from `$BQ_DATASET`.kll_sketch;
 
 # expected 0.5, 1
-select 
`$BQ_DATASET`.kll_sketch_float_get_cdf(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null), [10.0], true) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_cdf(`$BQ_DATASET`.kll_sketch_float_merge(sketch),
 [10.0], true) from `$BQ_DATASET`.kll_sketch;
 
 # expected 1
-select 
`$BQ_DATASET`.kll_sketch_float_get_min_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null)) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_min_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch))
 from `$BQ_DATASET`.kll_sketch;
 
 # expected 20
-select 
`$BQ_DATASET`.kll_sketch_float_get_max_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch,
 null)) from `$BQ_DATASET`.kll_sketch;
+select 
`$BQ_DATASET`.kll_sketch_float_get_max_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch))
 from `$BQ_DATASET`.kll_sketch;
 
 drop table `$BQ_DATASET`.kll_sketch;
 
 # expected about 1.3%
-select 
`$BQ_DATASET`.kll_sketch_float_get_normalized_rank_error(`$BQ_DATASET`.kll_sketch_float_build(value,
 null), false) from unnest(generate_array(1, 10000)) as value;
+select 
`$BQ_DATASET`.kll_sketch_float_get_normalized_rank_error(`$BQ_DATASET`.kll_sketch_float_build(value),
 false) from unnest(generate_array(1, 10000)) as value;
 
-select 
`$BQ_DATASET`.kll_sketch_float_get_num_retained(`$BQ_DATASET`.kll_sketch_float_build(value,
 null)) from unnest(generate_array(1, 10000)) as value;
+select 
`$BQ_DATASET`.kll_sketch_float_get_num_retained(`$BQ_DATASET`.kll_sketch_float_build(value))
 from unnest(generate_array(1, 10000)) as value;
 
 # expected false
 select `$BQ_DATASET`.kll_sketch_float_kolmogorov_smirnov(
-  (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value),
-  (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value),
+  (select `$BQ_DATASET`.kll_sketch_float_build(value) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value),
+  (select `$BQ_DATASET`.kll_sketch_float_build(value) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value),
   0.05
 );
 
 # expected true
 select `$BQ_DATASET`.kll_sketch_float_kolmogorov_smirnov(
-  (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value),
-  (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from 
unnest([11,12,13,14,15,16,17,18,19,20]) as value),
+  (select `$BQ_DATASET`.kll_sketch_float_build(value) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value),
+  (select `$BQ_DATASET`.kll_sketch_float_build(value) from 
unnest([11,12,13,14,15,16,17,18,19,20]) as value),
   0.05
 );


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to