This is an automated email from the ASF dual-hosted git repository.
jark pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluss.git
The following commit(s) were added to refs/heads/main by this push:
new 18bda48ce [docs] Improve aggregation.md with complete SQL examples for
rbm32/rbm64 (#2643)
18bda48ce is described below
commit 18bda48ce6be91a1c84638b512db3fba92100767
Author: Yang Wang <[email protected]>
AuthorDate: Wed Feb 11 14:43:49 2026 +0800
[docs] Improve aggregation.md with complete SQL examples for rbm32/rbm64
(#2643)
---
.../docs/table-design/merge-engines/aggregation.md | 100 +++++++++++++++++----
1 file changed, 81 insertions(+), 19 deletions(-)
diff --git a/website/docs/table-design/merge-engines/aggregation.md
b/website/docs/table-design/merge-engines/aggregation.md
index 7d585f210..50ded6638 100644
--- a/website/docs/table-design/merge-engines/aggregation.md
+++ b/website/docs/table-design/merge-engines/aggregation.md
@@ -121,7 +121,8 @@ INSERT INTO product_stats VALUES
### Querying Results
```sql
-SELECT * FROM product_stats;
+-- Point query by primary key
+SELECT * FROM product_stats WHERE product_id = 1;
```
**Result after aggregation:**
@@ -232,7 +233,7 @@ INSERT INTO test_sum VALUES
(1, 100.50),
(1, 200.75);
-SELECT * FROM test_sum;
+SELECT * FROM test_sum WHERE id = 1;
+------------+---------+
| id | amount |
+------------+---------+
@@ -287,14 +288,18 @@ INSERT INTO test_product VALUES
(1, 0.9),
(1, 0.8);
-SELECT * FROM test_product;
-+------------+---------+
-| id | amount |
-+------------+---------+
-| 1 | 0.72 |
-+------------+---------+
+SELECT * FROM test_product WHERE id = 1;
++------------+-----------------------+
+| id | discount_factor |
++------------+-----------------------+
+| 1 | 0.7200000000000001 |
++------------+-----------------------+
```
+:::note
+The result `0.7200000000000001` instead of `0.72` is expected behavior due to
IEEE 754 double-precision floating-point arithmetic. If exact precision is
required, consider using `DECIMAL` type instead of `DOUBLE`.
+:::
+
</TabItem>
<TabItem value="java-client" label="Java Client">
@@ -311,7 +316,8 @@ TableDescriptor.builder()
.build();
// Input: (1, 0.9), (1, 0.8)
-// Result: (1, 0.72) -- 90% * 80% = 72%
+// Result: (1, 0.7200000000000001) -- due to IEEE 754 floating-point arithmetic
+// Use DECIMAL type if exact precision is needed
```
</TabItem>
@@ -345,7 +351,7 @@ INSERT INTO test_max VALUES
(1, 25.5, TIMESTAMP '2024-01-01 10:00:00'),
(1, 28.3, TIMESTAMP '2024-01-01 11:00:00');
-SELECT * FROM test_max;
+SELECT * FROM test_max WHERE id = 1;
+------------+----------------+---------------------+
| id | temperature | reading_time |
+------------+----------------+---------------------+
@@ -402,7 +408,7 @@ INSERT INTO test_min VALUES
(1, 79.99),
(1, 89.99);
-SELECT * FROM test_min;
+SELECT * FROM test_min WHERE id = 1;
+------------+--------------+
| id | lowest_price |
+------------+--------------+
@@ -462,7 +468,7 @@ INSERT INTO test_last_value VALUES
(1, 'offline', TIMESTAMP '2024-01-01 11:00:00'),
(1, null, TIMESTAMP '2024-01-01 12:00:00'); -- Null overwrites previous
'offline' value
-SELECT * FROM test_last_value;
+SELECT * FROM test_last_value WHERE id = 1;
+------------+---------+---------------------+
| id | status | last_login |
+------------+---------+---------------------+
@@ -534,7 +540,7 @@ INSERT INTO test_last_value_ignore_nulls VALUES
(1, null, '789-012'), -- Null is ignored, email retains previous value
(1, '[email protected]', null);
-SELECT * FROM test_last_value_ignore_nulls;
+SELECT * FROM test_last_value_ignore_nulls WHERE id = 1;
+------------+-------------------+---------+
| id | email | phone |
+------------+-------------------+---------+
@@ -606,7 +612,7 @@ INSERT INTO test_first_value VALUES
(1, '2024-01-01', 'ProductA'),
(1, '2024-02-01', 'ProductB'); -- Ignored, first value retained
-SELECT * FROM test_first_value;
+SELECT * FROM test_first_value WHERE id = 1;
+------------+---------------------+---------------+
| id | first_purchase_date | first_product |
+------------+---------------------+---------------+
@@ -667,7 +673,7 @@ INSERT INTO test_first_value_ignore_nulls VALUES
(1, '[email protected]', '2024-01-01 10:00:00'),
(1, '[email protected]', '2024-01-02 10:00:00'); -- Only the first
non-null value is retained
-SELECT * FROM test_first_value_ignore_nulls;
+SELECT * FROM test_first_value_ignore_nulls WHERE id = 1;
+------------+-------------------+---------------------+
| id | email | verified_at |
+------------+-------------------+---------------------+
@@ -729,7 +735,7 @@ INSERT INTO test_listagg VALUES
(1, 'java', 'java'),
(1, 'flink', 'flink');
-SELECT * FROM test_listagg;
+SELECT * FROM test_listagg WHERE id = 1;
+------------+-----------------------+-----------------------+
| id | tags1 | tags2 |
+------------+-----------------------+-----------------------+
@@ -791,7 +797,7 @@ INSERT INTO test_string_agg VALUES
(1, 'java', 'java'),
(1, 'flink', 'flink');
-SELECT * FROM test_string_agg;
+SELECT * FROM test_string_agg WHERE id = 1;
+------------+-----------------------+-----------------------+
| id | tags1 | tags2 |
+------------+-----------------------+-----------------------+
@@ -848,8 +854,22 @@ CREATE TABLE user_visits (
'table.merge-engine' = 'aggregation',
'fields.visit_bitmap.agg' = 'rbm32'
);
+
+-- Insert serialized RoaringBitmap values as hex literals
+-- Bitmap {1,2}
+INSERT INTO user_visits VALUES (1,
x'3A30000001000000000001001000000001000200');
+-- Bitmap {2,3}
+INSERT INTO user_visits VALUES (1,
x'3A30000001000000000001001000000002000300');
+
+SELECT * FROM user_visits WHERE user_id = 1;
+-- Result: visit_bitmap contains the union {1,2,3}
+-- (serialized as x'3A300000010000000000020010000000010002000300')
```
+:::note
+RoaringBitmap values must be pre-serialized on the client side. The hex
literals above represent bitmaps serialized using the
[RoaringBitmap](https://github.com/RoaringBitmap/RoaringBitmap) library's
standard format.
+:::
+
</TabItem>
<TabItem value="java-client" label="Java Client">
@@ -859,6 +879,20 @@ Schema schema = Schema.newBuilder()
.column("visit_bitmap", DataTypes.BYTES(), AggFunctions.RBM32())
.primaryKey("user_id")
.build();
+
+TableDescriptor.builder()
+ .schema(schema)
+ .property("table.merge-engine", "aggregation")
+ .build();
+
+// Serialize bitmaps using the RoaringBitmap library
+// RoaringBitmap rbm1 = RoaringBitmap.bitmapOf(1, 2);
+// byte[] bytes1 = serialize(rbm1);
+// RoaringBitmap rbm2 = RoaringBitmap.bitmapOf(2, 3);
+// byte[] bytes2 = serialize(rbm2);
+
+// Input: (1, bitmap{1,2}), (1, bitmap{2,3})
+// Result: (1, bitmap{1,2,3}) -- union of the two bitmaps
```
</TabItem>
@@ -885,8 +919,22 @@ CREATE TABLE session_interactions (
'table.merge-engine' = 'aggregation',
'fields.interaction_bitmap.agg' = 'rbm64'
);
+
+-- Insert serialized Roaring64Bitmap values as hex literals
+-- Bitmap {10,20}
+INSERT INTO session_interactions VALUES (1,
x'01010000000000000004000000000000000000000000000000000001000000FE0100000001020200000
00A00140001000000000000000000000000000000');
+-- Bitmap {20,30}
+INSERT INTO session_interactions VALUES (1,
x'01010000000000000004000000000000000000000000000000000001000000FE01000000010202000000
14001E0001000000000000000000000000000000');
+
+SELECT * FROM session_interactions WHERE session_id = 1;
+-- Result: interaction_bitmap contains the union {10,20,30}
+-- (serialized as
x'01010000000000000004000000000000000000000000000000000001000000FE010000000102030000000A0014001E0001000000000000000000000000000000')
```
+:::note
+Roaring64Bitmap values must be pre-serialized on the client side. The hex
literals above represent bitmaps serialized using the
[RoaringBitmap](https://github.com/RoaringBitmap/RoaringBitmap) library's
64-bit format.
+:::
+
</TabItem>
<TabItem value="java-client" label="Java Client">
@@ -896,6 +944,20 @@ Schema schema = Schema.newBuilder()
.column("interaction_bitmap", DataTypes.BYTES(), AggFunctions.RBM64())
.primaryKey("session_id")
.build();
+
+TableDescriptor.builder()
+ .schema(schema)
+ .property("table.merge-engine", "aggregation")
+ .build();
+
+// Serialize bitmaps using the RoaringBitmap library
+// Roaring64Bitmap rbm1 = Roaring64Bitmap.bitmapOf(10, 20);
+// byte[] bytes1 = serialize(rbm1);
+// Roaring64Bitmap rbm2 = Roaring64Bitmap.bitmapOf(20, 30);
+// byte[] bytes2 = serialize(rbm2);
+
+// Input: (1, bitmap{10,20}), (1, bitmap{20,30})
+// Result: (1, bitmap{10,20,30}) -- union of the two bitmaps
```
</TabItem>
@@ -928,7 +990,7 @@ INSERT INTO test_bool_and VALUES
(1, true),
(1, false);
-SELECT * FROM test_bool_and;
+SELECT * FROM test_bool_and WHERE id = 1;
+------------+----------------------+
| id | has_all_permissions |
+------------+----------------------+
@@ -985,7 +1047,7 @@ INSERT INTO test_bool_or VALUES
(1, false),
(1, true);
-SELECT * FROM test_bool_or;
+SELECT * FROM test_bool_or WHERE id = 1;
+------------+------------------+
| id | has_any_alert |
+------------+------------------+