This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch kll_fix_random in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit eb44f0f91e77b9abc077774b9762436ff51a418c Author: AlexanderSaydakov <[email protected]> AuthorDate: Fri Sep 13 17:56:29 2024 -0700 init random generator, added example --- kll/Makefile | 3 ++- kll/crypto.js | 1 + kll/test/kll_sketch_example.sql | 35 +++++++++++++++++++++++++++++++++++ kll/test/kll_sketch_test.sql | 4 ++-- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/kll/Makefile b/kll/Makefile index de18fd5..254feae 100644 --- a/kll/Makefile +++ b/kll/Makefile @@ -24,7 +24,8 @@ EMCFLAGS=-I../datasketches-cpp/common/include \ -sENVIRONMENT=shell \ -sTOTAL_MEMORY=1024MB \ -O3 \ - --bind + --bind \ + --pre-js crypto.js ARTIFACTS=kll_sketch.mjs kll_sketch.js kll_sketch.wasm diff --git a/kll/crypto.js b/kll/crypto.js new file mode 100644 index 0000000..3f5f65b --- /dev/null +++ b/kll/crypto.js @@ -0,0 +1 @@ +var crypto = { getRandomValues: (array) => { for (var i = 0; i < array.length; i++) array[i] = (Math.random()*256)|0 } }; diff --git a/kll/test/kll_sketch_example.sql b/kll/test/kll_sketch_example.sql new file mode 100644 index 0000000..11623c6 --- /dev/null +++ b/kll/test/kll_sketch_example.sql @@ -0,0 +1,35 @@ +# Creating sample data with 1 million records split into 100 groups of nearly equal size + +CREATE OR REPLACE TABLE $BQ_DATASET.sample_data AS +SELECT + CONCAT("group_key_", CAST(RAND() * 100 AS INT64)) as group_key, + RAND() AS x +FROM + UNNEST(GENERATE_ARRAY(1, 1000000)); + +# Creating KLL merge sketches for a group key + +CREATE OR REPLACE TABLE $BQ_DATASET.agg_sample_data AS +SELECT + group_key, + count(*) AS total_count, + $BQ_DATASET.kll_sketch_float_build(x, 250) AS kll_sketch +FROM $BQ_DATASET.sample_data +GROUP BY group_key; + +# Merge group based sketches into a single sketch and then get approx quantiles + +WITH agg_data AS ( + SELECT + $BQ_DATASET.kll_sketch_float_merge(kll_sketch, 250) as merged_kll_sketch, + SUM(total_count) as total_count + FROM $BQ_DATASET.agg_sample_data +) +SELECT + $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.0, true) AS mininum, + $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.5, true) AS p50, + $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.75, true) AS p75, + $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.95, true) AS p95, + $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 1.0, true) AS maximum, + total_count +FROM agg_data; diff --git a/kll/test/kll_sketch_test.sql b/kll/test/kll_sketch_test.sql index c41b68f..ece9d4f 100644 --- a/kll/test/kll_sketch_test.sql +++ b/kll/test/kll_sketch_test.sql @@ -20,9 +20,9 @@ create or replace table $BQ_DATASET.kll_sketch(sketch bytes); insert into $BQ_DATASET.kll_sketch -(select $BQ_DATASET.kll_sketch_float_build(value, null) from unnest([1,2,3,4,5,6,7,8,9,10]) as value); +(select $BQ_DATASET.kll_sketch_float_build(value, 200) from unnest([1,2,3,4,5,6,7,8,9,10]) as value); insert into $BQ_DATASET.kll_sketch -(select $BQ_DATASET.kll_sketch_float_build(value, null) from unnest([11,12,13,14,15,16,17,18,19,20]) as value); +(select $BQ_DATASET.kll_sketch_float_build(value, 200) from unnest([11,12,13,14,15,16,17,18,19,20]) as value); select $BQ_DATASET.kll_sketch_float_to_string(sketch) from $BQ_DATASET.kll_sketch; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
