This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch kll_fix_random
in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git

commit eb44f0f91e77b9abc077774b9762436ff51a418c
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Fri Sep 13 17:56:29 2024 -0700

    init random generator, added example
---
 kll/Makefile                    |  3 ++-
 kll/crypto.js                   |  1 +
 kll/test/kll_sketch_example.sql | 35 +++++++++++++++++++++++++++++++++++
 kll/test/kll_sketch_test.sql    |  4 ++--
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/kll/Makefile b/kll/Makefile
index de18fd5..254feae 100644
--- a/kll/Makefile
+++ b/kll/Makefile
@@ -24,7 +24,8 @@ EMCFLAGS=-I../datasketches-cpp/common/include \
        -sENVIRONMENT=shell \
        -sTOTAL_MEMORY=1024MB \
        -O3 \
-       --bind
+       --bind \
+       --pre-js crypto.js
 
 ARTIFACTS=kll_sketch.mjs kll_sketch.js kll_sketch.wasm
 
diff --git a/kll/crypto.js b/kll/crypto.js
new file mode 100644
index 0000000..3f5f65b
--- /dev/null
+++ b/kll/crypto.js
@@ -0,0 +1 @@
+var crypto = { getRandomValues: (array) => { for (var i = 0; i < array.length; 
i++) array[i] = (Math.random()*256)|0 } };
diff --git a/kll/test/kll_sketch_example.sql b/kll/test/kll_sketch_example.sql
new file mode 100644
index 0000000..11623c6
--- /dev/null
+++ b/kll/test/kll_sketch_example.sql
@@ -0,0 +1,35 @@
+# Creating sample data with 1 million records split into 100 groups of nearly 
equal size
+
+CREATE OR REPLACE TABLE $BQ_DATASET.sample_data AS
+SELECT
+  CONCAT("group_key_", CAST(RAND() * 100 AS INT64)) as group_key,
+  RAND() AS x
+FROM
+  UNNEST(GENERATE_ARRAY(1, 1000000));
+
+# Creating KLL merge sketches for a group key
+
+CREATE OR REPLACE TABLE $BQ_DATASET.agg_sample_data AS
+SELECT
+  group_key,
+  count(*) AS total_count,
+  $BQ_DATASET.kll_sketch_float_build(x, 250) AS kll_sketch
+FROM $BQ_DATASET.sample_data
+GROUP BY group_key;
+
+# Merge group based sketches into a single sketch and then get approx quantiles
+
+WITH agg_data AS (
+  SELECT
+    $BQ_DATASET.kll_sketch_float_merge(kll_sketch, 250) as merged_kll_sketch,
+    SUM(total_count) as total_count
+  FROM $BQ_DATASET.agg_sample_data
+)
+SELECT
+  $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.0, true) AS 
mininum,
+  $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.5, true) AS 
p50,
+  $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.75, true) AS 
p75,
+  $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 0.95, true) AS 
p95,
+  $BQ_DATASET.kll_sketch_float_get_quantile(merged_kll_sketch, 1.0, true) AS 
maximum,
+  total_count
+FROM agg_data;
diff --git a/kll/test/kll_sketch_test.sql b/kll/test/kll_sketch_test.sql
index c41b68f..ece9d4f 100644
--- a/kll/test/kll_sketch_test.sql
+++ b/kll/test/kll_sketch_test.sql
@@ -20,9 +20,9 @@
 create or replace table $BQ_DATASET.kll_sketch(sketch bytes);
 
 insert into $BQ_DATASET.kll_sketch
-(select $BQ_DATASET.kll_sketch_float_build(value, null) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value);
+(select $BQ_DATASET.kll_sketch_float_build(value, 200) from 
unnest([1,2,3,4,5,6,7,8,9,10]) as value);
 insert into $BQ_DATASET.kll_sketch
-(select $BQ_DATASET.kll_sketch_float_build(value, null) from 
unnest([11,12,13,14,15,16,17,18,19,20]) as value);
+(select $BQ_DATASET.kll_sketch_float_build(value, 200) from 
unnest([11,12,13,14,15,16,17,18,19,20]) as value);
 
 select $BQ_DATASET.kll_sketch_float_to_string(sketch) from 
$BQ_DATASET.kll_sketch;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to