This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new 2cd1fb83ae [function](bitmap) support bitmap_hash64 (#12992) (#14663)
2cd1fb83ae is described below
commit 2cd1fb83ae665517d97158cca0736e28d5a39cdd
Author: TengJianPing <[email protected]>
AuthorDate: Tue Nov 29 18:35:24 2022 +0800
[function](bitmap) support bitmap_hash64 (#12992) (#14663)
---
be/src/exprs/bitmap_function.cpp | 10 +
be/src/exprs/bitmap_function.h | 1 +
be/src/vec/functions/function_bitmap.cpp | 44 +++-
.../aggregate-functions/bitmap_union.md | 149 ++++++++++++
.../bitmap-functions/bitmap_hash64.md | 52 +++++
.../sql-manual/sql-reference/Data-Types/BITMAP.md | 48 ++++
.../aggregate-functions/bitmap_union.md | 148 ++++++++++++
.../bitmap-functions/bitmap_hash64.md | 52 +++++
.../sql-manual/sql-reference/Data-Types/BITMAP.md | 48 ++++
gensrc/script/doris_builtins_functions.py | 6 +
.../data/datatype_p0/bitmap/test_bitmap_int.out | Bin 0 -> 315 bytes
.../bitmap_functions/test_bitmap_function.out | 251 +++++++++++++++++++++
.../datatype_p0/bitmap/test_bitmap_int.groovy | 49 ++++
.../bitmap_functions/test_bitmap_function.groovy | 187 +++++++++++++++
14 files changed, 1037 insertions(+), 8 deletions(-)
diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 5a64a129ac..26c756bca8 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -194,6 +194,16 @@ StringVal
BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx,
}
return serialize(ctx, &bitmap);
}
+StringVal BitmapFunctions::bitmap_hash64(doris_udf::FunctionContext* ctx,
+ const doris_udf::StringVal& src) {
+ BitmapValue bitmap;
+ if (!src.is_null) {
+ uint64_t hash_value = 0;
+ murmur_hash3_x64_64(src.ptr, src.len, 0, &hash_value);
+ bitmap.add(hash_value);
+ }
+ return serialize(ctx, &bitmap);
+}
StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const
StringVal& src) {
if (src.is_null) {
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index 79bb3aefe6..570b4c7615 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -70,6 +70,7 @@ public:
static StringVal to_bitmap(FunctionContext* ctx, const StringVal& src);
static StringVal to_bitmap_with_check(FunctionContext* ctx, const
StringVal& src);
static StringVal bitmap_hash(FunctionContext* ctx, const StringVal& src);
+ static StringVal bitmap_hash64(FunctionContext* ctx, const StringVal& src);
static StringVal bitmap_or(FunctionContext* ctx, const StringVal& src,
const StringVal& dst);
static StringVal bitmap_xor(FunctionContext* ctx, const StringVal& src,
const StringVal& dst);
static StringVal bitmap_and(FunctionContext* ctx, const StringVal& src,
const StringVal& dst);
diff --git a/be/src/vec/functions/function_bitmap.cpp
b/be/src/vec/functions/function_bitmap.cpp
index a49daeff6e..f4018fe52b 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -188,8 +188,22 @@ public:
}
};
-struct BitmapHash {
+template <int HashBits>
+struct BitmapHashName {};
+
+template <>
+struct BitmapHashName<32> {
static constexpr auto name = "bitmap_hash";
+};
+
+template <>
+struct BitmapHashName<64> {
+ static constexpr auto name = "bitmap_hash64";
+};
+
+template <int HashBits>
+struct BitmapHash {
+ static constexpr auto name = BitmapHashName<HashBits>::name;
using ReturnType = DataTypeBitMap;
@@ -202,9 +216,15 @@ struct BitmapHash {
for (size_t i = 0; i < size; ++i) {
const char* raw_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1] - 1;
- uint32_t hash_value =
- HashUtil::murmur_hash3_32(raw_str, str_size,
HashUtil::MURMUR3_32_SEED);
- res_data[i].add(hash_value);
+ if constexpr (HashBits == 32) {
+ uint32_t hash_value =
+ HashUtil::murmur_hash3_32(raw_str, str_size,
HashUtil::MURMUR3_32_SEED);
+ res_data[i].add(hash_value);
+ } else {
+ uint64_t hash_value = 0;
+ murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
+ res_data[i].add(hash_value);
+ }
}
}
@@ -221,9 +241,15 @@ struct BitmapHash {
} else {
const char* raw_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1] - 1;
- uint32_t hash_value =
- HashUtil::murmur_hash3_32(raw_str, str_size,
HashUtil::MURMUR3_32_SEED);
- res_data[i].add(hash_value);
+ if constexpr (HashBits == 32) {
+ uint32_t hash_value =
+ HashUtil::murmur_hash3_32(raw_str, str_size,
HashUtil::MURMUR3_32_SEED);
+ res_data[i].add(hash_value);
+ } else {
+ uint64_t hash_value = 0;
+ murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
+ res_data[i].add(hash_value);
+ }
}
}
}
@@ -560,7 +586,8 @@ using FunctionBitmapEmpty = FunctionConst<BitmapEmpty,
false>;
using FunctionToBitmap = FunctionAlwaysNotNullable<ToBitmap>;
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck,
true>;
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
-using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash>;
+using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
+using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>;
using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>;
@@ -589,6 +616,7 @@ void register_function_bitmap(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionToBitmapWithCheck>();
factory.register_function<FunctionBitmapFromString>();
factory.register_function<FunctionBitmapHash>();
+ factory.register_function<FunctionBitmapHash64>();
factory.register_function<FunctionBitmapCount>();
factory.register_function<FunctionBitmapMin>();
factory.register_function<FunctionBitmapMax>();
diff --git
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
new file mode 100644
index 0000000000..8afe22c5be
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
@@ -0,0 +1,149 @@
+---
+{
+ "title": "BITMAP_UNION",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+## BITMAP_UNION
+
+### description
+
+### example
+
+#### Create table
+
+The aggregation model needs to be used when creating the table. The data type
is bitmap and the aggregation function is bitmap_union.
+```
+CREATE TABLE `pv_bitmap` (
+ `dt` int (11) NULL COMMENT" ",
+ `page` varchar (10) NULL COMMENT" ",
+ `user_id` bitmap BITMAP_UNION NULL COMMENT" "
+) ENGINE = OLAP
+AGGREGATE KEY (`dt`,` page`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH (`dt`) BUCKETS 2;
+```
+
+Note: When the amount of data is large, it is best to create a corresponding
rollup table for high-frequency bitmap_union queries
+
+```
+ALTER TABLE pv_bitmap ADD ROLLUP pv (page, user_id);
+```
+
+#### Data Load
+
+`TO_BITMAP (expr)`: Convert 0 ~ 18446744073709551615 unsigned bigint to bitmap
+
+`BITMAP_EMPTY ()`: Generate empty bitmap columns, used for insert or import to
fill the default value
+
+`BITMAP_HASH (expr)` or `BITMAP_HASH64 (expr)`: Convert any type of column to
a bitmap by hashing
+
+##### Stream Load
+
+```
+cat data | curl --location-trusted -u user: passwd -T--H "columns: dt, page,
user_id, user_id = to_bitmap (user_id)" http: // host: 8410 / api / test /
testDb / _stream_load
+```
+
+```
+cat data | curl --location-trusted -u user: passwd -T--H "columns: dt, page,
user_id, user_id = bitmap_hash (user_id)" http: // host: 8410 / api / test /
testDb / _stream_load
+```
+
+```
+cat data | curl --location-trusted -u user: passwd -T--H "columns: dt, page,
user_id, user_id = bitmap_empty ()" http: // host: 8410 / api / test / testDb /
_stream_load
+```
+
+##### Insert Into
+
+id2's column type is bitmap
+```
+insert into bitmap_table1 select id, id2 from bitmap_table2;
+```
+
+id2's column type is bitmap
+```
+INSERT INTO bitmap_table1 (id, id2) VALUES (1001, to_bitmap (1000)), (1001,
to_bitmap (2000));
+```
+
+id2's column type is bitmap
+```
+insert into bitmap_table1 select id, bitmap_union (id2) from bitmap_table2
group by id;
+```
+
+id2's column type is int
+```
+insert into bitmap_table1 select id, to_bitmap (id2) from table;
+```
+
+id2's column type is String
+```
+insert into bitmap_table1 select id, bitmap_hash (id_string) from table;
+```
+
+
+#### Data Query
+
+##### Syntax
+
+
+`BITMAP_UNION (expr)`: Calculate the union of two Bitmaps. The return value is
the new Bitmap value.
+
+`BITMAP_UNION_COUNT (expr)`: Calculate the cardinality of the union of two
Bitmaps, equivalent to BITMAP_COUNT (BITMAP_UNION (expr)). It is recommended to
use the BITMAP_UNION_COUNT function first, its performance is better than
BITMAP_COUNT (BITMAP_UNION (expr)).
+
+`BITMAP_UNION_INT (expr)`: Count the number of different values in columns
of type TINYINT, SMALLINT and INT, return the sum of COUNT (DISTINCT expr) same
+
+`INTERSECT_COUNT (bitmap_column_to_count, filter_column, filter_values
...)`: The calculation satisfies
+filter_column The cardinality of the intersection of multiple bitmaps of the
filter.
+bitmap_column_to_count is a column of type bitmap, filter_column is a column
of varying dimensions, and filter_values is a list of dimension values.
+
+##### Example
+
+The following SQL uses the pv_bitmap table above as an example:
+
+Calculate the deduplication value for user_id:
+
+```
+select bitmap_union_count (user_id) from pv_bitmap;
+
+select bitmap_count (bitmap_union (user_id)) from pv_bitmap;
+```
+
+Calculate the deduplication value of id:
+
+```
+select bitmap_union_int (id) from pv_bitmap;
+```
+
+Calculate the retention of user_id:
+
+```
+select intersect_count (user_id, page, 'meituan') as meituan_uv,
+intersect_count (user_id, page, 'waimai') as waimai_uv,
+intersect_count (user_id, page, 'meituan', 'waimai') as retention // Number of
users appearing on both 'meituan' and 'waimai' pages
+from pv_bitmap
+where page in ('meituan', 'waimai');
+```
+
+### keywords
+
+BITMAP, BITMAP_COUNT, BITMAP_EMPTY, BITMAP_UNION, BITMAP_UNION_INT, TO_BITMAP,
BITMAP_UNION_COUNT, INTERSECT_COUNT
diff --git
a/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_hash64.md
b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_hash64.md
new file mode 100644
index 0000000000..e633df9b94
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_hash64.md
@@ -0,0 +1,52 @@
+---
+{
+ "title": "bitmap_hash64",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## bitmap_hash64
+### description
+#### Syntax
+
+`BITMAP BITMAP_HASH64(expr)`
+
+Compute the 64-bits hash value of a expr of any type, then return a bitmap
containing that hash value. Mainly be used to load non-integer value into
bitmap column, e.g.,
+
+```
+cat data | curl --location-trusted -u user:passwd -T - -H "columns:
dt,page,device_id, device_id=bitmap_hash64(device_id)"
http://host:8410/api/test/testDb/_stream_load
+```
+
+### example
+
+```
+mysql> select bitmap_count(bitmap_hash64('hello'));
++------------------------------------+
+| bitmap_count(bitmap_hash64('hello')) |
++------------------------------------+
+| 1 |
++------------------------------------+
+```
+
+### keywords
+
+ BITMAP_HASH,BITMAP
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
new file mode 100644
index 0000000000..c66b29424a
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
@@ -0,0 +1,48 @@
+---
+{
+ "title": "BITMAP",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## BITMAP
+### Description
+BITMAP
+
+BITMAP cannot be used as a key column, and the aggregation type is
BITMAP_UNION when building the table.
+The user does not need to specify the length and default value. The length is
controlled within the system according to the degree of data aggregation.
+And the BITMAP column can only be queried or used by supporting functions such
as bitmap_union_count, bitmap_union, bitmap_hash and bitmap_hash64.
+
+The use of BITMAP in offline scenarios will affect the import speed. In the
case of a large amount of data, the query speed will be slower than HLL and
better than Count Distinct.
+Note: If BITMAP does not use a global dictionary in real-time scenarios, using
bitmap_hash() may cause an error of about one-thousandth. If the error rate is
not tolerable, bitmap_hash64 can be used instead.
+
+### example
+
+ select hour, BITMAP_UNION_COUNT(pv) over(order by hour) uv from(
+ select hour, BITMAP_UNION(device_id) as pv
+ from metric_table -- Query the accumulated UV per hour
+ where datekey=20200922
+ group by hour order by 1
+ ) final;
+
+### keywords
+BITMAP
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
new file mode 100644
index 0000000000..81b8c0c9be
--- /dev/null
+++
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/bitmap_union.md
@@ -0,0 +1,148 @@
+---
+{
+ "title": "BITMAP_UNION",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## BITMAP_UNION
+
+### description
+
+### example
+
+#### Create table
+
+建表时需要使用聚合模型,数据类型是 bitmap , 聚合函数是 bitmap_union
+
+```
+CREATE TABLE `pv_bitmap` (
+ `dt` int(11) NULL COMMENT "",
+ `page` varchar(10) NULL COMMENT "",
+ `user_id` bitmap BITMAP_UNION NULL COMMENT ""
+) ENGINE=OLAP
+AGGREGATE KEY(`dt`, `page`)
+COMMENT "OLAP"
+DISTRIBUTED BY HASH(`dt`) BUCKETS 2;
+```
+注:当数据量很大时,最好为高频率的 bitmap_union 查询建立对应的 rollup 表
+
+```
+ALTER TABLE pv_bitmap ADD ROLLUP pv (page, user_id);
+```
+
+#### Data Load
+
+`TO_BITMAP(expr)` : 将 0 ~ 18446744073709551615 的 unsigned bigint 转为 bitmap
+
+`BITMAP_EMPTY()`: 生成空 bitmap 列,用于 insert 或导入的时填充默认值
+
+`BITMAP_HASH(expr)`或者`BITMAP_HASH64(expr)`: 将任意类型的列通过 Hash 的方式转为 bitmap
+
+##### Stream Load
+
+```
+cat data | curl --location-trusted -u user:passwd -T - -H "columns:
dt,page,user_id, user_id=to_bitmap(user_id)"
http://host:8410/api/test/testDb/_stream_load
+```
+
+```
+cat data | curl --location-trusted -u user:passwd -T - -H "columns:
dt,page,user_id, user_id=bitmap_hash(user_id)"
http://host:8410/api/test/testDb/_stream_load
+```
+
+```
+cat data | curl --location-trusted -u user:passwd -T - -H "columns:
dt,page,user_id, user_id=bitmap_empty()"
http://host:8410/api/test/testDb/_stream_load
+```
+
+##### Insert Into
+
+id2 的列类型是 bitmap
+```
+insert into bitmap_table1 select id, id2 from bitmap_table2;
+```
+
+id2 的列类型是 bitmap
+```
+INSERT INTO bitmap_table1 (id, id2) VALUES (1001, to_bitmap(1000)), (1001,
to_bitmap(2000));
+```
+
+id2 的列类型是 bitmap
+```
+insert into bitmap_table1 select id, bitmap_union(id2) from bitmap_table2
group by id;
+```
+
+id2 的列类型是 int
+```
+insert into bitmap_table1 select id, to_bitmap(id2) from table;
+```
+
+id2 的列类型是 String
+```
+insert into bitmap_table1 select id, bitmap_hash(id_string) from table;
+```
+
+#### Data Query
+##### Syntax
+
+
+`BITMAP_UNION(expr)` : 计算输入 Bitmap 的并集,返回新的bitmap
+
+`BITMAP_UNION_COUNT(expr)`: 计算输入 Bitmap 的并集,返回其基数,和
BITMAP_COUNT(BITMAP_UNION(expr)) 等价。目前推荐优先使用 BITMAP_UNION_COUNT ,其性能优于
BITMAP_COUNT(BITMAP_UNION(expr))
+
+`BITMAP_UNION_INT(expr)` : 计算 TINYINT,SMALLINT 和 INT 类型的列中不同值的个数,返回值和
+COUNT(DISTINCT expr) 相同
+
+`INTERSECT_COUNT(bitmap_column_to_count, filter_column, filter_values ...)` :
计算满足
+filter_column 过滤条件的多个 bitmap 的交集的基数值。
+bitmap_column_to_count 是 bitmap 类型的列,filter_column 是变化的维度列,filter_values
是维度取值列表
+
+
+##### Example
+
+下面的 SQL 以上面的 pv_bitmap table 为例:
+
+计算 user_id 的去重值:
+
+```
+select bitmap_union_count(user_id) from pv_bitmap;
+
+select bitmap_count(bitmap_union(user_id)) from pv_bitmap;
+```
+
+计算 id 的去重值:
+
+```
+select bitmap_union_int(id) from pv_bitmap;
+```
+
+计算 user_id 的 留存:
+
+```
+select intersect_count(user_id, page, 'meituan') as meituan_uv,
+intersect_count(user_id, page, 'waimai') as waimai_uv,
+intersect_count(user_id, page, 'meituan', 'waimai') as retention //在 'meituan'
和 'waimai' 两个页面都出现的用户数
+from pv_bitmap
+where page in ('meituan', 'waimai');
+```
+
+### keywords
+
+BITMAP,BITMAP_COUNT,BITMAP_EMPTY,BITMAP_UNION,BITMAP_UNION_INT,TO_BITMAP,BITMAP_UNION_COUNT,INTERSECT_COUNT
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_hash64.md
b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_hash64.md
new file mode 100644
index 0000000000..38c6bf22b7
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_hash64.md
@@ -0,0 +1,52 @@
+---
+{
+ "title": "bitmap_hash64",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## bitmap_hash64
+### description
+#### Syntax
+
+`BITMAP BITMAP_HASH64(expr)`
+
+对任意类型的输入计算64位的哈希值,返回包含该哈希值的bitmap。主要用于stream load任务将非整型字段导入Doris表的bitmap字段。例如
+
+```
+cat data | curl --location-trusted -u user:passwd -T - -H "columns:
dt,page,device_id, device_id=bitmap_hash64(device_id)"
http://host:8410/api/test/testDb/_stream_load
+```
+
+### example
+
+```
+mysql> select bitmap_count(bitmap_hash64('hello'));
++------------------------------------+
+| bitmap_count(bitmap_hash64('hello')) |
++------------------------------------+
+| 1 |
++------------------------------------+
+```
+
+### keywords
+
+ BITMAP_HASH,BITMAP
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
new file mode 100644
index 0000000000..f469615ab1
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Types/BITMAP.md
@@ -0,0 +1,48 @@
+---
+{
+ "title": "BITMAP",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## BITMAP
+### description
+ BITMAP
+ BITMAP不能作为key列使用,建表时配合聚合类型为BITMAP_UNION。
+ 用户不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。
+
并且BITMAP列只能通过配套的bitmap_union_count、bitmap_union、bitmap_hash、bitmap_hash64等函数进行查询或使用。
+
+ 离线场景下使用BITMAP会影响导入速度,在数据量大的情况下查询速度会慢于HLL,并优于Count Distinct。
+
注意:实时场景下BITMAP如果不使用全局字典,使用了bitmap_hash()可能会导致有千分之一左右的误差。如果这个误差不可接受,可以使用bitmap_hash64。
+
+### example
+
+ select hour, BITMAP_UNION_COUNT(pv) over(order by hour) uv from(
+ select hour, BITMAP_UNION(device_id) as pv
+ from metric_table -- 查询每小时的累计UV
+ where datekey=20200622
+ group by hour order by 1
+ ) final;
+
+### keywords
+
+ BITMAP
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 8cce6f2ade..36a94c39a0 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1144,6 +1144,9 @@ visible_functions = [
[['bitmap_hash'], 'BITMAP', ['VARCHAR'],
'_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+ [['bitmap_hash64'], 'BITMAP', ['VARCHAR'],
+
'_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE',
+ '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['to_bitmap'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions9to_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
@@ -1151,6 +1154,9 @@ visible_functions = [
'_ZN5doris15BitmapFunctions20to_bitmap_with_checkEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_hash'], 'BITMAP', ['STRING'],
+
'_ZN5doris15BitmapFunctions11bitmap_hash64EPN9doris_udf15FunctionContextERKNS1_9StringValE',
+ '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
+ [['bitmap_hash64'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions11bitmap_hashEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
[['bitmap_count'], 'BIGINT', ['BITMAP'],
diff --git a/regression-test/data/datatype_p0/bitmap/test_bitmap_int.out
b/regression-test/data/datatype_p0/bitmap/test_bitmap_int.out
new file mode 100644
index 0000000000..a8066c8eda
Binary files /dev/null and
b/regression-test/data/datatype_p0/bitmap/test_bitmap_int.out differ
diff --git
a/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
new file mode 100644
index 0000000000..6661a40fb8
--- /dev/null
+++
b/regression-test/data/query_p0/sql_functions/bitmap_functions/test_bitmap_function.out
@@ -0,0 +1,251 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+0
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1,2
+
+-- !sql --
+
+
+-- !sql --
+\N
+
+-- !sql --
+false
+
+-- !sql --
+true
+
+-- !sql --
+0
+
+-- !sql --
+
+
+-- !sql --
+0,1,2
+
+-- !sql --
+\N
+
+-- !sql --
+false
+
+-- !sql --
+true
+
+-- !sql --
+true
+
+-- !sql --
+false
+
+-- !sql_bitmap_hash1 --
+1
+
+-- !sql_bitmap_hash2 --
+1
+
+-- !sql_bitmap_hash3 --
+0
+
+-- !sql_bitmap_hash64_1 --
+1
+
+-- !sql_bitmap_hash64_2 --
+1
+
+-- !sql_bitmap_hash64_3 --
+0
+
+-- !sql --
+2
+
+-- !sql --
+1
+
+-- !sql --
+1,2
+
+-- !sql --
+\N
+
+-- !sql --
+0,1,2,10
+
+-- !sql --
+1,2,3,4,5,10
+
+-- !sql --
+0
+
+-- !sql --
+3
+
+-- !sql --
+1
+
+-- !sql --
+2
+
+-- !sql --
+0
+
+-- !sql --
+\N
+
+-- !sql --
+3
+
+-- !sql --
+3
+
+-- !sql --
+5
+
+-- !sql --
+6
+
+-- !sql --
+\N
+
+-- !sql --
+2
+
+-- !sql --
+1,4
+
+-- !sql --
+1,3,5
+
+-- !sql --
+1,3,5
+
+-- !sql --
+\N
+
+-- !sql --
+4
+
+-- !sql --
+0
+
+-- !sql --
+6
+
+-- !sql --
+3
+
+-- !sql --
+3
+
+-- !sql --
+\N
+
+-- !sql --
+0
+
+-- !sql --
+5
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+1,2,3,4,5
+
+-- !sql --
+2
+
+-- !sql --
+1,2,3
+
+-- !sql --
+4,5
+
+-- !sql --
+0,1,2
+
+-- !sql --
+2,3
+
+-- !sql --
+2,3,5
+
+-- !sql --
+\N
+
+-- !sql --
+
+
+-- !sql --
+1
+
+-- !sql --
+1,2
+
+-- !sql --
+1 \N
+2 \N
+
+-- !sql --
+1 3
+2 2
+
+-- !sql --
+1 3
+2 2
+
+-- !sql --
+2
+
+-- !sql --
+1,4
+
+-- !sql --
+1,3,5
+
+-- !sql --
+1,3,5
+
+-- !sql --
+\N
+
+-- !sql --
+1
+
+-- !sql --
+
+
+-- !sql --
+\N
+
+-- !sql --
+9999999999
+
+-- !sql --
+4 1,2,3
+3 1,2,3,4,5
+
+-- !sql --
+3
+
+-- !sql --
+\N
+
+-- !sql --
+0
+
+-- !sql --
+0
+
diff --git a/regression-test/suites/datatype_p0/bitmap/test_bitmap_int.groovy
b/regression-test/suites/datatype_p0/bitmap/test_bitmap_int.groovy
new file mode 100644
index 0000000000..1cdbd80c79
--- /dev/null
+++ b/regression-test/suites/datatype_p0/bitmap/test_bitmap_int.groovy
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_bitmap_int") {
+ sql "DROP TABLE IF EXISTS test_int_bitmap"
+ sql """
+ CREATE TABLE test_int_bitmap (`id` int, `bitmap_set` bitmap
bitmap_union)
+ ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 5
properties("replication_num" = "1");
+ """
+ sql "insert into test_int_bitmap values(1, bitmap_hash(1)), (2,
bitmap_hash(2)), (3, bitmap_hash(3))"
+
+ qt_sql1 "select bitmap_union_count(bitmap_set) from test_int_bitmap"
+ qt_sql2 "select id,bitmap_union_count(bitmap_set) from test_int_bitmap
group by id order by id"
+ order_qt_sql3 "select * from test_int_bitmap"
+ qt_desc "desc test_int_bitmap"
+
+ sql "DROP TABLE test_int_bitmap"
+
+ // bitmap_hash64
+ sql """
+ CREATE TABLE test_int_bitmap (`id` int, `bitmap_set` bitmap
bitmap_union)
+ ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 5
properties("replication_num" = "1");
+ """
+ sql "insert into test_int_bitmap values(1, bitmap_hash64(1)), (2,
bitmap_hash64(2)), (3, bitmap_hash64(3))"
+ sql "insert into test_int_bitmap values(1, bitmap_hash64(11)), (2,
bitmap_hash64(22))"
+
+ qt_sql64_1 "select bitmap_union_count(bitmap_set) from test_int_bitmap"
+ qt_sql64_2 "select id,bitmap_union_count(bitmap_set) from test_int_bitmap
group by id order by id"
+ order_qt_sql64_3 "select * from test_int_bitmap"
+
+ sql "DROP TABLE test_int_bitmap"
+
+}
+
+
diff --git
a/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy
b/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy
new file mode 100644
index 0000000000..7a8282bf9c
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy
@@ -0,0 +1,187 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_bitmap_function") {
+
+ sql """ SET enable_vectorized_engine = TRUE; """
+
+ // BITMAP_AND
+ qt_sql """ select bitmap_count(bitmap_and(to_bitmap(1), to_bitmap(2))) cnt
"""
+ qt_sql """ select bitmap_count(bitmap_and(to_bitmap(1), to_bitmap(1))) cnt
"""
+ qt_sql """ select bitmap_to_string(bitmap_and(to_bitmap(1), to_bitmap(1)))
"""
+ qt_sql """ select bitmap_to_string(bitmap_and(bitmap_from_string('1,2,3'),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5'))) """
+ qt_sql """ select bitmap_to_string(bitmap_and(bitmap_from_string('1,2,3'),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5'),bitmap_empty())) """
+ qt_sql """ select bitmap_to_string(bitmap_and(bitmap_from_string('1,2,3'),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5'),NULL)) """
+
+ // BITMAP_CONTAINS
+ qt_sql """ select bitmap_contains(to_bitmap(1),2) cnt """
+ qt_sql """ select bitmap_contains(to_bitmap(1),1) cnt """
+
+ // BITMAP_EMPTY
+ qt_sql """ select bitmap_count(bitmap_empty()) """
+
+ // BITMAP_FROM_STRING
+ qt_sql """ select bitmap_to_string(bitmap_empty()) """
+ qt_sql """ select bitmap_to_string(bitmap_from_string("0, 1, 2")) """
+ qt_sql """ select bitmap_from_string("-1, 0, 1, 2") """
+
+ // BITMAP_HAS_ANY
+ qt_sql """ select bitmap_has_any(to_bitmap(1),to_bitmap(2)) cnt """
+ qt_sql """ select bitmap_has_any(to_bitmap(1),to_bitmap(1)) cnt """
+
+ // BITMAP_HAS_ALL
+ qt_sql """ select bitmap_has_all(bitmap_from_string("0, 1, 2"),
bitmap_from_string("1, 2")) cnt """
+ qt_sql """ select bitmap_has_all(bitmap_empty(), bitmap_from_string("1,
2")) cnt """
+
+ // BITMAP_HASH
+ qt_sql_bitmap_hash1 """ select bitmap_count(bitmap_hash('hello')) """
+ qt_sql_bitmap_hash2 """ select bitmap_count(bitmap_hash('')) """
+ qt_sql_bitmap_hash3 """ select bitmap_count(bitmap_hash(null)) """
+
+ // BITMAP_HASH64
+ qt_sql_bitmap_hash64_1 """ select bitmap_count(bitmap_hash64('hello')) """
+ qt_sql_bitmap_hash64_2 """ select bitmap_count(bitmap_hash64('')) """
+ qt_sql_bitmap_hash64_3 """ select bitmap_count(bitmap_hash64(null)) """
+
+ // BITMAP_OR
+ qt_sql """ select bitmap_count(bitmap_or(to_bitmap(1), to_bitmap(2))) cnt
"""
+ qt_sql """ select bitmap_count(bitmap_or(to_bitmap(1), to_bitmap(1))) cnt
"""
+ qt_sql """ select bitmap_to_string(bitmap_or(to_bitmap(1), to_bitmap(2)))
"""
+ qt_sql """ select bitmap_to_string(bitmap_or(to_bitmap(1), to_bitmap(2),
to_bitmap(10), to_bitmap(0), NULL)) """
+ qt_sql """ select bitmap_to_string(bitmap_or(to_bitmap(1), to_bitmap(2),
to_bitmap(10), to_bitmap(0), bitmap_empty())) """
+ qt_sql """ select bitmap_to_string(bitmap_or(to_bitmap(10),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5'))) """
+
+ // bitmap_and_count
+ qt_sql """ select
bitmap_and_count(bitmap_from_string('1,2,3'),bitmap_empty()) """
+ qt_sql """ select
bitmap_and_count(bitmap_from_string('1,2,3'),bitmap_from_string('1,2,3')) """
+ qt_sql """ select
bitmap_and_count(bitmap_from_string('1,2,3'),bitmap_from_string('3,4,5')) """
+ qt_sql """ select bitmap_and_count(bitmap_from_string('1,2,3'),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5')) """
+ qt_sql """ select bitmap_and_count(bitmap_from_string('1,2,3'),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5'),bitmap_empty()) """
+ qt_sql """ select bitmap_and_count(bitmap_from_string('1,2,3'),
bitmap_from_string('1,2'), bitmap_from_string('1,2,3,4,5'), NULL) """
+
+ // bitmap_or_count
+ qt_sql """ select
bitmap_or_count(bitmap_from_string('1,2,3'),bitmap_empty()) """
+ qt_sql """ select
bitmap_or_count(bitmap_from_string('1,2,3'),bitmap_from_string('1,2,3'))"""
+ qt_sql """ select
bitmap_or_count(bitmap_from_string('1,2,3'),bitmap_from_string('3,4,5')) """
+ qt_sql """ select bitmap_or_count(bitmap_from_string('1,2,3'),
bitmap_from_string('3,4,5'), to_bitmap(100), bitmap_empty()) """
+ qt_sql """ select bitmap_or_count(bitmap_from_string('1,2,3'),
bitmap_from_string('3,4,5'), to_bitmap(100), NULL) """
+
+ // BITMAP_XOR
+ qt_sql """ select
bitmap_count(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4')))
cnt """
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4')))
"""
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5')))
"""
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5'),bitmap_empty()))
"""
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5'),NULL))
"""
+
+ // BITMAP_XOR_COUNT
+ qt_sql """ select
bitmap_xor_count(bitmap_from_string('1,2,3'),bitmap_from_string('3,4,5')) """
+ qt_sql """ select
bitmap_xor_count(bitmap_from_string('1,2,3'),bitmap_from_string('1,2,3')) """
+ qt_sql """ select
bitmap_xor_count(bitmap_from_string('1,2,3'),bitmap_from_string('4,5,6')) """
+ qt_sql """ select
(bitmap_xor_count(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5')))
"""
+ qt_sql """ select
(bitmap_xor_count(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5'),bitmap_empty()))
"""
+ qt_sql """ select
(bitmap_xor_count(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5'),NULL))
"""
+
+ // BITMAP_NOT
+ qt_sql """ select
bitmap_count(bitmap_not(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4')))
cnt """
+ qt_sql """ select
bitmap_to_string(bitmap_not(bitmap_from_string('2,3,5'),bitmap_from_string('1,2,3,4')))
"""
+
+ // BITMAP_AND_NOT
+ qt_sql """ select
bitmap_count(bitmap_and_not(bitmap_from_string('1,2,3'),bitmap_from_string('3,4,5')))
cnt """
+
+ // BITMAP_AND_NOT_COUNT
+ qt_sql """ select
bitmap_and_not_count(bitmap_from_string('1,2,3'),bitmap_from_string('3,4,5'))
cnt """
+
+ // BITMAP_SUBSET_IN_RANGE
+ qt_sql """ select
bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 0, 9))
value """
+ qt_sql """ select
bitmap_to_string(bitmap_subset_in_range(bitmap_from_string('1,2,3,4,5'), 2, 3))
value """
+
+ // BITMAP_SUBSET_LIMIT
+ qt_sql """ select
bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 0, 3))
value """
+ qt_sql """ select
bitmap_to_string(bitmap_subset_limit(bitmap_from_string('1,2,3,4,5'), 4, 3))
value """
+
+ // SUB_BITMAP
+ qt_sql """ select
bitmap_to_string(sub_bitmap(bitmap_from_string('1,0,1,2,3,1,5'), 0, 3)) value
"""
+ qt_sql """ select
bitmap_to_string(sub_bitmap(bitmap_from_string('1,0,1,2,3,1,5'), -3, 2)) value
"""
+ qt_sql """ select
bitmap_to_string(sub_bitmap(bitmap_from_string('1,0,1,2,3,1,5'), 2, 100)) value
"""
+
+ // BITMAP_TO_STRING
+ qt_sql """ select bitmap_to_string(null) """
+ qt_sql """ select bitmap_to_string(bitmap_empty()) """
+ qt_sql """ select bitmap_to_string(to_bitmap(1)) """
+ qt_sql """ select bitmap_to_string(bitmap_or(to_bitmap(1), to_bitmap(2)))
"""
+
+ // BITMAP_UNION
+ def bitmapUnionTable = "test_bitmap_union"
+ sql """ DROP TABLE IF EXISTS ${bitmapUnionTable} """
+ sql """ create table ${bitmapUnionTable} (page_id int,user_id bitmap
bitmap_union) aggregate key (page_id) distributed by hash (page_id)
PROPERTIES("replication_num" = "1") """
+
+ sql """ insert into ${bitmapUnionTable} values(1, to_bitmap(1)); """
+ sql """ insert into ${bitmapUnionTable} values(1, to_bitmap(2)); """
+ sql """ insert into ${bitmapUnionTable} values(1, to_bitmap(3)); """
+ sql """ insert into ${bitmapUnionTable} values(2, to_bitmap(1)); """
+ sql """ insert into ${bitmapUnionTable} values(2, to_bitmap(2)); """
+
+ qt_sql """ select page_id, bitmap_union(user_id) from ${bitmapUnionTable}
group by page_id order by page_id """
+ qt_sql """ select page_id, bitmap_count(bitmap_union(user_id)) from
${bitmapUnionTable} group by page_id order by page_id """
+ qt_sql """ select page_id, count(distinct user_id) from
${bitmapUnionTable} group by page_id order by page_id """
+
+ sql """ drop table ${bitmapUnionTable} """
+
+ // BITMAP_XOR
+ qt_sql """ select
bitmap_count(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4')))
cnt; """
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4')));
"""
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5')));
"""
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5'),bitmap_empty()));
"""
+ qt_sql """ select
bitmap_to_string(bitmap_xor(bitmap_from_string('2,3'),bitmap_from_string('1,2,3,4'),bitmap_from_string('3,4,5'),NULL));
"""
+
+ // TO_BITMAP
+ qt_sql """ select bitmap_count(to_bitmap(10)) """
+ qt_sql """ select bitmap_to_string(to_bitmap(-1)) """
+
+ // BITMAP_MAX
+ qt_sql """ select bitmap_max(bitmap_from_string('')) value; """
+ qt_sql """ select bitmap_max(bitmap_from_string('1,9999999999')) value """
+
+ // INTERSECT_COUNT
+ def intersectCountTable = "test_intersect_count"
+ sql """ DROP TABLE IF EXISTS ${intersectCountTable} """
+ sql """ create table ${intersectCountTable} (dt int (11),page varchar
(10),user_id bitmap BITMAP_UNION ) DISTRIBUTED BY HASH(dt) BUCKETS 2
PROPERTIES("replication_num" = "1") """
+
+
+ sql """ insert into ${intersectCountTable} values(3,"110001",
to_bitmap(1)); """
+ sql """ insert into ${intersectCountTable} values(3,"110001",
to_bitmap(2)); """
+ sql """ insert into ${intersectCountTable} values(3,"110001",
to_bitmap(3)); """
+ sql """ insert into ${intersectCountTable} values(3,"110001",
to_bitmap(4)); """
+ sql """ insert into ${intersectCountTable} values(3,"110001",
to_bitmap(5)); """
+ sql """ insert into ${intersectCountTable} values(4,"110001",
to_bitmap(1)); """
+ sql """ insert into ${intersectCountTable} values(4,"110001",
to_bitmap(2)); """
+ sql """ insert into ${intersectCountTable} values(4,"110001",
to_bitmap(3)); """
+
+ qt_sql """ select dt,bitmap_to_string(user_id) from ${intersectCountTable}
where dt in (3,4) order by dt desc; """
+ qt_sql """ select intersect_count(user_id,dt,3,4) from
${intersectCountTable}; """
+
+ // ARTHOGONAL_BITMAP_****
+ def arthogonalBitmapTable = "test_arthogonal_bitmap"
+ sql """ DROP TABLE IF EXISTS ${arthogonalBitmapTable} """
+ sql """ CREATE TABLE ${arthogonalBitmapTable} ( tag_group bigint(20) NULL
COMMENT "标签组", tag_value_id varchar(64) NULL COMMENT "标签值", tag_range int(11)
NOT NULL DEFAULT "0" COMMENT "", partition_sign varchar(32) NOT NULL COMMENT
"分区标识", bucket int(11) NOT NULL COMMENT "分桶字段", confidence tinyint(4) NULL
DEFAULT "100" COMMENT "置信度", members bitmap BITMAP_UNION NULL COMMENT "人群")
ENGINE=OLAP AGGREGATE KEY(tag_group, tag_value_id, tag_range, partition_sign,
bucket, confidence) COMMENT "d [...]
+
+ qt_sql """ select orthogonal_bitmap_intersect(members, tag_group, 1150000,
1150001, 390006) from ${arthogonalBitmapTable} where tag_group in ( 1150000,
1150001, 390006); """
+ qt_sql """ select orthogonal_bitmap_intersect_count(members, tag_group,
1150000, 1150001, 390006) from ${arthogonalBitmapTable} where tag_group in (
1150000, 1150001, 390006); """
+ qt_sql """ select orthogonal_bitmap_union_count(members) from
${arthogonalBitmapTable} where tag_group in ( 1150000, 1150001, 390006); """
+
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]