This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch kll_default in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit b441deefc55619ef7a10f1d0173317cc29383cbd Author: AlexanderSaydakov <[email protected]> AuthorDate: Tue Oct 22 11:20:51 2024 -0700 functions with default k --- kll/sqlx/kll_sketch_float_build.sqlx | 87 ++-------------------- ...at_build.sqlx => kll_sketch_float_build_k.sqlx} | 0 kll/sqlx/kll_sketch_float_merge.sqlx | 87 ++-------------------- ...at_merge.sqlx => kll_sketch_float_merge_k.sqlx} | 0 kll/test/kll_sketch_example.sql | 4 +- kll/test/kll_sketch_test.sql | 39 ++++++---- 6 files changed, 36 insertions(+), 181 deletions(-) diff --git a/kll/sqlx/kll_sketch_float_build.sqlx b/kll/sqlx/kll_sketch_float_build.sqlx index a6c4224..0201610 100644 --- a/kll/sqlx/kll_sketch_float_build.sqlx +++ b/kll/sqlx/kll_sketch_float_build.sqlx @@ -19,95 +19,18 @@ config { hasOutput: true } -CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value FLOAT64, k INT NOT AGGREGATE) +CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(value FLOAT64) RETURNS BYTES -LANGUAGE js OPTIONS ( - library=["${JS_BUCKET}/kll_sketch_float.mjs"], description = '''Creates a sketch that represents the distribution of the given column. Param value: the column of FLOAT64 values. -Param k: the sketch accuracy/size parameter as an INT in the range [8, 65535]. +Defaults: k = 200. Returns: a KLL Sketch, as bytes. For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' -) AS R""" -import ModuleFactory from "${JS_BUCKET}/kll_sketch_float.mjs"; -var Module = await ModuleFactory(); -const default_k = Number(Module.DEFAULT_K); - -// UDAF interface -export function initialState(k) { - try { - var state = { - k: k == null ? default_k : Number(k), - }; - state.sketch = new Module.kll_sketch_float(state.k); - return state; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } -} - -export function aggregate(state, value) { - try { - if (state.sketch == null) { // for transition deserialize-aggregate - state.sketch = new Module.kll_sketch_float(state.k); - } - state.sketch.update(value); - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } -} - -export function serialize(state) { - if (state.sketch == null) return state; // for transition deserialize-serialize - try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - sketch.merge(state.serialized); - } - return { - k: state.k, - serialized: state.sketch.serializeAsUint8Array() - }; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } finally { - state.sketch.delete(); - } -} - -export function deserialize(serialized) { - return serialized; -} - -export function merge(state, other_state) { - try { - if (state.sketch == null) { - state.sketch = new Module.kll_sketch_float(state.k); - } - if (state.serialized != null) { - state.sketch.merge(state.serialized); - state.serialized = null; - } - if (other_state.serialized != null) { - state.sketch.merge(other_state.serialized); - other_state.serialized = null; - } - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } -} - -export function finalize(state) { - return serialize(state).serialized; -} -"""; +) AS ( + ${ref("kll_sketch_float_build_k")}(value, NULL) +); diff --git a/kll/sqlx/kll_sketch_float_build.sqlx b/kll/sqlx/kll_sketch_float_build_k.sqlx similarity index 100% copy from kll/sqlx/kll_sketch_float_build.sqlx copy to kll/sqlx/kll_sketch_float_build_k.sqlx diff --git a/kll/sqlx/kll_sketch_float_merge.sqlx b/kll/sqlx/kll_sketch_float_merge.sqlx index 7a5588e..9135a6b 100644 --- a/kll/sqlx/kll_sketch_float_merge.sqlx +++ b/kll/sqlx/kll_sketch_float_merge.sqlx @@ -19,95 +19,18 @@ config { hasOutput: true } -CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(sketch BYTES, k INT NOT AGGREGATE) +CREATE OR REPLACE AGGREGATE FUNCTION ${self()}(sketch BYTES) RETURNS BYTES -LANGUAGE js OPTIONS ( - library=["${JS_BUCKET}/kll_sketch_float.mjs"], description = '''Merges sketches from the given column. Param sketch: the column of values. -Param k: the sketch accuracy/size parameter as an integer in the range [8, 65535]. +Defaluts: k = 200. Returns: a serialized KLL sketch as BYTES. For more information: - https://datasketches.apache.org/docs/KLL/KLLSketch.html ''' -) AS R""" -import ModuleFactory from "${JS_BUCKET}/kll_sketch_float.mjs"; -var Module = await ModuleFactory(); -const default_k = Number(Module.DEFAULT_K); - -// UDAF interface -export function initialState(k) { - try { - var state = { - k: k == null ? default_k : Number(k), - }; - state.sketch = new Module.kll_sketch_float(state.k); - return state; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } -} - -export function aggregate(state, sketch) { - try { - if (state.sketch == null) { // for transition deserialize-aggregate - state.sketch = new Module.kll_sketch_float(state.k); - } - state.sketch.merge(sketch); - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } -} - -export function serialize(state) { - if (state.sketch == null) return state; // for transition deserialize-serialize - try { - // for prior transition deserialize-aggregate - // merge aggregated and serialized state - if (state.sketch != null && state.serialized != null) { - sketch.merge(state.serialized); - } - return { - k: state.k, - serialized: state.sketch.serializeAsUint8Array() - }; - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } finally { - state.sketch.delete(); - } -} - -export function deserialize(serialized) { - return serialized; -} - -export function merge(state, other_state) { - try { - if (state.sketch == null) { - state.sketch = new Module.kll_sketch_float(state.k); - } - if (state.serialized != null) { - state.sketch.merge(state.serialized); - state.serialized = null; - } - if (other_state.serialized != null) { - state.sketch.merge(other_state.serialized); - other_state.serialized = null; - } - } catch (e) { - if (e.message != null) throw e; - throw new Error(Module.getExceptionMessage(e)); - } -} - -export function finalize(state) { - return serialize(state).serialized; -} -"""; +) AS ( + ${ref("kll_sketch_float_merge_k")}(sketch, NULL) +); diff --git a/kll/sqlx/kll_sketch_float_merge.sqlx b/kll/sqlx/kll_sketch_float_merge_k.sqlx similarity index 100% copy from kll/sqlx/kll_sketch_float_merge.sqlx copy to kll/sqlx/kll_sketch_float_merge_k.sqlx diff --git a/kll/test/kll_sketch_example.sql b/kll/test/kll_sketch_example.sql index a0cfdd7..9fa28b7 100644 --- a/kll/test/kll_sketch_example.sql +++ b/kll/test/kll_sketch_example.sql @@ -32,7 +32,7 @@ CREATE OR REPLACE TABLE `$BQ_DATASET`.agg_sample_data AS SELECT group_key, count(*) AS total_count, - `$BQ_DATASET`.kll_sketch_float_build(x, 250) AS kll_sketch + `$BQ_DATASET`.kll_sketch_float_build_k(x, 250) AS kll_sketch FROM `$BQ_DATASET`.sample_data GROUP BY group_key; @@ -40,7 +40,7 @@ GROUP BY group_key; WITH agg_data AS ( SELECT - `$BQ_DATASET`.kll_sketch_float_merge(kll_sketch, 250) as merged_kll_sketch, + `$BQ_DATASET`.kll_sketch_float_merge_k(kll_sketch, 250) as merged_kll_sketch, SUM(total_count) as total_count FROM `$BQ_DATASET`.agg_sample_data ) diff --git a/kll/test/kll_sketch_test.sql b/kll/test/kll_sketch_test.sql index fc36aa3..e62b12c 100644 --- a/kll/test/kll_sketch_test.sql +++ b/kll/test/kll_sketch_test.sql @@ -19,51 +19,60 @@ create or replace table `$BQ_DATASET`.kll_sketch(sketch bytes); +# using defalut insert into `$BQ_DATASET`.kll_sketch -(select `$BQ_DATASET`.kll_sketch_float_build(value, 200) from unnest([1,2,3,4,5,6,7,8,9,10]) as value); +(select `$BQ_DATASET`.kll_sketch_float_build(value) from unnest([1,2,3,4,5,6,7,8,9,10]) as value); + +# using full signature insert into `$BQ_DATASET`.kll_sketch -(select `$BQ_DATASET`.kll_sketch_float_build(value, 200) from unnest([11,12,13,14,15,16,17,18,19,20]) as value); +(select `$BQ_DATASET`.kll_sketch_float_build_k(value, 100) from unnest([11,12,13,14,15,16,17,18,19,20]) as value); select `$BQ_DATASET`.kll_sketch_float_to_string(sketch) from `$BQ_DATASET`.kll_sketch; +# using default +select `$BQ_DATASET`.kll_sketch_float_to_string(`$BQ_DATASET`.kll_sketch_float_merge(sketch)) from `$BQ_DATASET`.kll_sketch; + +# using full signature +select `$BQ_DATASET`.kll_sketch_float_to_string(`$BQ_DATASET`.kll_sketch_float_merge_k(sketch, 100)) from `$BQ_DATASET`.kll_sketch; + # expected 0.5 -select `$BQ_DATASET`.kll_sketch_float_get_rank(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null), 10, true) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_rank(`$BQ_DATASET`.kll_sketch_float_merge(sketch), 10, true) from `$BQ_DATASET`.kll_sketch; # expected 10 -select `$BQ_DATASET`.kll_sketch_float_get_quantile(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null), 0.5, true) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_quantile(`$BQ_DATASET`.kll_sketch_float_merge(sketch), 0.5, true) from `$BQ_DATASET`.kll_sketch; # expected 20 -select `$BQ_DATASET`.kll_sketch_float_get_n(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null)) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_n(`$BQ_DATASET`.kll_sketch_float_merge(sketch)) from `$BQ_DATASET`.kll_sketch; # expected 0.5, 0.5 -select `$BQ_DATASET`.kll_sketch_float_get_pmf(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null), [10.0], true) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_pmf(`$BQ_DATASET`.kll_sketch_float_merge(sketch), [10.0], true) from `$BQ_DATASET`.kll_sketch; # expected 0.5, 1 -select `$BQ_DATASET`.kll_sketch_float_get_cdf(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null), [10.0], true) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_cdf(`$BQ_DATASET`.kll_sketch_float_merge(sketch), [10.0], true) from `$BQ_DATASET`.kll_sketch; # expected 1 -select `$BQ_DATASET`.kll_sketch_float_get_min_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null)) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_min_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch)) from `$BQ_DATASET`.kll_sketch; # expected 20 -select `$BQ_DATASET`.kll_sketch_float_get_max_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch, null)) from `$BQ_DATASET`.kll_sketch; +select `$BQ_DATASET`.kll_sketch_float_get_max_value(`$BQ_DATASET`.kll_sketch_float_merge(sketch)) from `$BQ_DATASET`.kll_sketch; drop table `$BQ_DATASET`.kll_sketch; # expected about 1.3% -select `$BQ_DATASET`.kll_sketch_float_get_normalized_rank_error(`$BQ_DATASET`.kll_sketch_float_build(value, null), false) from unnest(generate_array(1, 10000)) as value; +select `$BQ_DATASET`.kll_sketch_float_get_normalized_rank_error(`$BQ_DATASET`.kll_sketch_float_build(value), false) from unnest(generate_array(1, 10000)) as value; -select `$BQ_DATASET`.kll_sketch_float_get_num_retained(`$BQ_DATASET`.kll_sketch_float_build(value, null)) from unnest(generate_array(1, 10000)) as value; +select `$BQ_DATASET`.kll_sketch_float_get_num_retained(`$BQ_DATASET`.kll_sketch_float_build(value)) from unnest(generate_array(1, 10000)) as value; # expected false select `$BQ_DATASET`.kll_sketch_float_kolmogorov_smirnov( - (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from unnest([1,2,3,4,5,6,7,8,9,10]) as value), - (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from unnest([1,2,3,4,5,6,7,8,9,10]) as value), + (select `$BQ_DATASET`.kll_sketch_float_build(value) from unnest([1,2,3,4,5,6,7,8,9,10]) as value), + (select `$BQ_DATASET`.kll_sketch_float_build(value) from unnest([1,2,3,4,5,6,7,8,9,10]) as value), 0.05 ); # expected true select `$BQ_DATASET`.kll_sketch_float_kolmogorov_smirnov( - (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from unnest([1,2,3,4,5,6,7,8,9,10]) as value), - (select `$BQ_DATASET`.kll_sketch_float_build(value, null) from unnest([11,12,13,14,15,16,17,18,19,20]) as value), + (select `$BQ_DATASET`.kll_sketch_float_build(value) from unnest([1,2,3,4,5,6,7,8,9,10]) as value), + (select `$BQ_DATASET`.kll_sketch_float_build(value) from unnest([11,12,13,14,15,16,17,18,19,20]) as value), 0.05 ); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
