This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch update_readme in repository https://gitbox.apache.org/repos/asf/datasketches-bigquery.git
commit 0fa1cbbf0eadef725fd6a643162cf637b37e3cd0 Author: AlexanderSaydakov <[email protected]> AuthorDate: Fri Feb 14 16:01:10 2025 -0800 regenerated readme using the latest generator --- cpc/README.md | 2 ++ fi/README.md | 2 ++ hll/README.md | 2 ++ kll/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ req/README.md | 2 ++ tdigest/README.md | 2 ++ theta/README.md | 2 ++ 7 files changed, 54 insertions(+) diff --git a/cpc/README.md b/cpc/README.md index 0d34db7..9f2126c 100644 --- a/cpc/README.md +++ b/cpc/README.md @@ -152,6 +152,8 @@ Computes a sketch that represents the scalar union of the two given sketches. * Returns: a CPC Sketch, as BYTES. ## Examples + +### [test/cpc_sketch_test.sql](../cpc/test/cpc_sketch_test.sql) ```sql # using defaults diff --git a/fi/README.md b/fi/README.md index 322569f..a24d9d0 100644 --- a/fi/README.md +++ b/fi/README.md @@ -72,6 +72,8 @@ If NULL, the maximum error of the sketch is used as a threshold. * Returns: an array of frequent items with frequency estimates, lower and upper bounds. ## Examples + +### [test/frequent_strings_sketch_test.sql](../fi/test/frequent_strings_sketch_test.sql) ```sql select bqutil.datasketches.frequent_strings_sketch_to_string(bqutil.datasketches.frequent_strings_sketch_build(str, 1, 5)) from unnest(["a", "b", "c"]) as str; diff --git a/hll/README.md b/hll/README.md index f2495a6..5fae23f 100644 --- a/hll/README.md +++ b/hll/README.md @@ -123,6 +123,8 @@ Computes a sketch that represents the union of the two given sketches. * Returns: an HLL Sketch, as BYTES. ## Examples + +### [test/hll_sketch_test.sql](../hll/test/hll_sketch_test.sql) ```sql # expected 3 diff --git a/kll/README.md b/kll/README.md index 35484e8..f751559 100644 --- a/kll/README.md +++ b/kll/README.md @@ -183,6 +183,48 @@ Returns a value from the sketch that is the best approximation to a value from t * Returns: an approximate quantile associated with the given rank. ## Examples + +### [test/kll_sketch_example.sql](../kll/test/kll_sketch_example.sql) +```sql + +# Creating sample data with 1 million records split into 100 groups of nearly equal size + +CREATE OR REPLACE TEMP TABLE sample_data AS +SELECT + CONCAT("group_key_", CAST(RAND() * 100 AS INT64)) as group_key, + RAND() AS x +FROM + UNNEST(GENERATE_ARRAY(1, 1000000)); + +# Creating KLL merge sketches for a group key + +CREATE OR REPLACE TEMP TABLE agg_sample_data AS +SELECT + group_key, + count(*) AS total_count, + bqutil.datasketches.kll_sketch_float_build_k(x, 250) AS kll_sketch +FROM sample_data +GROUP BY group_key; + +# Merge group based sketches into a single sketch and then get approx quantiles + +WITH agg_data AS ( + SELECT + bqutil.datasketches.kll_sketch_float_merge_k(kll_sketch, 250) as merged_kll_sketch, + SUM(total_count) as total_count + FROM agg_sample_data +) +SELECT + bqutil.datasketches.kll_sketch_float_get_quantile(merged_kll_sketch, 0.0, true) AS mininum, + bqutil.datasketches.kll_sketch_float_get_quantile(merged_kll_sketch, 0.5, true) AS p50, + bqutil.datasketches.kll_sketch_float_get_quantile(merged_kll_sketch, 0.75, true) AS p75, + bqutil.datasketches.kll_sketch_float_get_quantile(merged_kll_sketch, 0.95, true) AS p95, + bqutil.datasketches.kll_sketch_float_get_quantile(merged_kll_sketch, 1.0, true) AS maximum, + total_count +FROM agg_data; +``` + +### [test/kll_sketch_test.sql](../kll/test/kll_sketch_test.sql) ```sql create or replace temp table kll_sketch(sketch bytes); diff --git a/req/README.md b/req/README.md index 4b8969a..422fa93 100644 --- a/req/README.md +++ b/req/README.md @@ -186,6 +186,8 @@ Returns an approximation to the normalized rank, on the interval \[0.0, 1.0\], o * Returns: an approximate rank of the given value. ## Examples + +### [test/req_sketch_float_test.sql](../req/test/req_sketch_float_test.sql) ```sql # using defaults diff --git a/tdigest/README.md b/tdigest/README.md index 39af672..e5522f7 100644 --- a/tdigest/README.md +++ b/tdigest/README.md @@ -105,6 +105,8 @@ Returns a value from the sketch that is the best approximation to a value from t * Returns: an approximate quantile associated with the given rank. ## Examples + +### [test/tdigest_double_test.sql](../tdigest/test/tdigest_double_test.sql) ```sql create or replace temp table tdigest_double(sketch bytes); diff --git a/theta/README.md b/theta/README.md index 6c831c4..8497287 100644 --- a/theta/README.md +++ b/theta/README.md @@ -241,6 +241,8 @@ Computes a sketch that represents the scalar union of the two given sketches. * Returns: a Compact, Compressed Theta Sketch, as BYTES. ## Examples + +### [test/theta_sketch_test.sql](../theta/test/theta_sketch_test.sql) ```sql # using defaults --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
