This is an automated email from the ASF dual-hosted git repository.
kangpinghuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0d66e6b Support bitmap_intersect (#3571)
0d66e6b is described below
commit 0d66e6bd1578eba0e9a58cf591f05a83f9e2b334
Author: EmmyMiao87 <[email protected]>
AuthorDate: Wed May 20 21:12:02 2020 +0800
Support bitmap_intersect (#3571)
* Support bitmap_intersect
Support aggregate function Bitmap Intersect, it is mainly used to take
intersection of grouped data.
The function 'bitmap_intersect(expr)' calculates the intersection of bitmap
columns and returns a bitmap object.
The defination is following:
FunctionName: bitmap_intersect,
InputType: bitmap,
OutputType: bitmap
The scenario is as follows:
Query which users satisfy the three tags a, b, and c at the same time.
```
select bitmap_to_string(bitmap_intersect(user_id)) from
(
select bitmap_union(user_id) user_id from bitmap_intersect_test
where tag in ('a', 'b', 'c')
group by tag
) a
```
Closed #3552.
* Add docs of bitmap_union and bitmap_intersect
* Support null of bitmap_intersect
---
be/src/exprs/bitmap_function.cpp | 31 +++++++++++
be/src/exprs/bitmap_function.h | 6 ++-
be/test/exprs/bitmap_function_test.cpp | 34 ++++++++++++
docs/.vuepress/sidebar/en.js | 2 +
docs/.vuepress/sidebar/zh-CN.js | 2 +
.../bitmap-functions/bitmap_intersect.md | 61 +++++++++++++++++++++
.../sql-functions/bitmap-functions/bitmap_union.md | 58 ++++++++++++++++++++
.../bitmap-functions/bitmap_intersect.md | 62 ++++++++++++++++++++++
.../sql-functions/bitmap-functions/bitmap_union.md | 58 ++++++++++++++++++++
.../apache/doris/analysis/FunctionCallExpr.java | 3 +-
.../java/org/apache/doris/catalog/FunctionSet.java | 11 ++++
11 files changed, 326 insertions(+), 2 deletions(-)
diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 09fdd14..0d9bf25 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -302,6 +302,31 @@ void BitmapFunctions::bitmap_union(FunctionContext* ctx,
const StringVal& src, S
}
}
+// the dst value could be null
+void BitmapFunctions::nullable_bitmap_init(FunctionContext* ctx, StringVal*
dst) {
+ dst->is_null = true;
+}
+
+void BitmapFunctions::bitmap_intersect(FunctionContext* ctx, const StringVal&
src, StringVal* dst) {
+ if (src.is_null) {
+ return;
+ }
+ // if dst is null, the src input is the first value
+ if (dst->is_null) {
+ dst->is_null = false;
+ dst->len = sizeof(BitmapValue);
+ dst->ptr = (uint8_t*)new BitmapValue((char*) src.ptr);
+ return;
+ }
+ auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr);
+ // zero size means the src input is a agg object
+ if (src.len == 0) {
+ (*dst_bitmap) &= *reinterpret_cast<BitmapValue*>(src.ptr);
+ } else {
+ (*dst_bitmap) &= BitmapValue((char*) src.ptr);
+ }
+}
+
BigIntVal BitmapFunctions::bitmap_count(FunctionContext* ctx, const StringVal&
src) {
if (src.is_null) {
return 0;
@@ -343,12 +368,17 @@ StringVal
BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx, const do
}
StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const
StringVal& src) {
+ if (src.is_null) {
+ return src;
+ }
+
auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr);
StringVal result = serialize(ctx, src_bitmap);
delete src_bitmap;
return result;
}
+// This is a init function for intersect_count not for bitmap_intersect.
template<typename T, typename ValType>
void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal*
dst) {
dst->is_null = false;
@@ -510,6 +540,7 @@ template void BitmapFunctions::bitmap_update_int<IntVal>(
template void BitmapFunctions::bitmap_update_int<BigIntVal>(
FunctionContext* ctx, const BigIntVal& src, StringVal* dst);
+// this is init function for intersect_count not for bitmap_intersect
template void BitmapFunctions::bitmap_intersect_init<int8_t, TinyIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::bitmap_intersect_init<int16_t, SmallIntVal>(
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index b69fc49..5d86228 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -51,6 +51,9 @@ public:
static BigIntVal bitmap_get_value(FunctionContext* ctx, const StringVal&
src);
static void bitmap_union(FunctionContext* ctx, const StringVal& src,
StringVal* dst);
+ // the dst value could be null
+ static void nullable_bitmap_init(FunctionContext* ctx, StringVal* dst);
+ static void bitmap_intersect(FunctionContext* ctx, const StringVal& src,
StringVal* dst);
static BigIntVal bitmap_count(FunctionContext* ctx, const StringVal& src);
static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal&
src);
@@ -68,8 +71,9 @@ public:
static BooleanVal bitmap_contains(FunctionContext* ctx, const StringVal&
src, const BigIntVal& input);
static BooleanVal bitmap_has_any(FunctionContext* ctx, const StringVal&
lhs, const StringVal& rhs);
- // bitmap_intersect
+ // intersect count
template<typename T, typename ValType>
+ // this is init function for intersect_count not for bitmap_intersect
static void bitmap_intersect_init(FunctionContext* ctx, StringVal* dst);
template<typename T, typename ValType>
static void bitmap_intersect_update(FunctionContext* ctx, const StringVal&
src, const ValType& key,
diff --git a/be/test/exprs/bitmap_function_test.cpp
b/be/test/exprs/bitmap_function_test.cpp
index 16239d9..17b377f 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -172,6 +172,39 @@ TEST_F(BitmapFunctionsTest, bitmap_union) {
ASSERT_EQ(expected, result);
}
+// test bitmap_intersect
+TEST_F(BitmapFunctionsTest, bitmap_intersect) {
+ StringVal dst;
+ BitmapFunctions::bitmap_intersect_init_real(ctx, &dst);
+
+ BitmapValue bitmap1(1);
+ bitmap1.add(2);
+ bitmap1.add(3);
+ StringVal src1 = convert_bitmap_to_string(ctx, bitmap1);
+ BitmapFunctions::bitmap_intersect(ctx, src1, &dst);
+
+ BitmapValue bitmap2(1);
+ bitmap2.add(2);
+ StringVal src2 = convert_bitmap_to_string(ctx, bitmap2);
+ BitmapFunctions::bitmap_intersect(ctx, src2, &dst);
+
+ StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst);
+ BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized);
+ BigIntVal expected(2);
+ ASSERT_EQ(expected, result);
+}
+
+// test bitmap_intersect with null dst
+TEST_F(BitmapFunctionsTest, bitmap_intersect_empty) {
+ StringVal dst;
+ BitmapFunctions::bitmap_intersect_init_real(ctx, &dst);
+
+ StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst);
+ BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized);
+ BigIntVal expected(0);
+ ASSERT_EQ(expected, result);
+}
+
TEST_F(BitmapFunctionsTest, bitmap_count) {
BitmapValue bitmap(1024);
bitmap.add(1);
@@ -186,6 +219,7 @@ TEST_F(BitmapFunctionsTest, bitmap_count) {
ASSERT_EQ(BigIntVal(0), null_bitmap);
}
+// test intersect_count
template<typename ValType, typename ValueType>
void test_bitmap_intersect(FunctionContext* ctx, ValType key1, ValType key2) {
StringVal bitmap_column("placeholder");
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index e7ade2c..f293bb8 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -259,6 +259,8 @@ module.exports = [
"bitmap_or",
"bitmap_to_string",
"to_bitmap",
+ "bitmap_intersect",
+ "bitmap_union",
],
},
{
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index 880c340..9c4fde2 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -271,6 +271,8 @@ module.exports = [
"bitmap_or",
"bitmap_to_string",
"to_bitmap",
+ "bitmap_intersect",
+ "bitmap_union",
],
},
{
diff --git
a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
new file mode 100644
index 0000000..374441a
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
@@ -0,0 +1,61 @@
+---
+{
+ "title": "bitmap_intersect",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_intersect
+## description
+
+Aggregation function, used to calculate the bitmap intersection after
grouping. Common usage scenarios such as: calculating user retention rate.
+
+### Syntax
+
+`BITMAP BITMAP_INTERSECT(BITMAP value)`
+
+Enter a set of bitmap values, find the intersection of the set of bitmap
values, and return.
+
+## example
+
+Table schema
+
+```
+KeysType: AGG_KEY
+Columns: tag varchar, date datetime, user_id bitmap bitmap_union
+```
+
+```
+Find the retention of users between 2020-05-18 and 2020-05-19 under different
tags.
+mysql> select tag, bitmap_intersect(user_id) from (select tag, date,
bitmap_union(user_id) user_id from table where date in ('2020-05-18',
'2020-05-19') group by tag, date) a group by tag;
+```
+
+Used in combination with the bitmap_to_string function to obtain the specific
data of the intersection
+
+```
+Who are the users retained under different tags between 2020-05-18 and
2020-05-19?
+mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select
tag, date, bitmap_union(user_id) user_id from table where date in
('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
+```
+
+## keyword
+
+ BITMAP_INTERSECT, BITMAP
diff --git
a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
new file mode 100644
index 0000000..4e92f97
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
@@ -0,0 +1,58 @@
+---
+{
+ "title": "bitmap_union",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_union
+## description
+
+Aggregate function, used to calculate the grouped bitmap union. Common usage
scenarios such as: calculating PV, UV.
+
+### Syntax
+
+`BITMAP BITMAP_UNION(BITMAP value)`
+
+Enter a set of bitmap values, find the union of this set of bitmap values, and
return.
+
+## example
+
+```
+mysql> select page_id, bitmap_union(user_id) from table group by page_id;
+```
+
+Combined with the bitmap_count function, the PV data of the web page can be
obtained
+
+```
+mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by
page_id;
+```
+
+When the user_id field is int, the above query semantics is equivalent to
+
+```
+mysql> select page_id, count(distinct user_id) from table group by page_id;
+```
+
+## keyword
+
+ BITMAP_UNION, BITMAP
diff --git
a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
new file mode 100644
index 0000000..3b71de4
--- /dev/null
+++
b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
@@ -0,0 +1,62 @@
+---
+{
+ "title": "bitmap_intersect",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_intersect
+## description
+
+聚合函数,用于计算分组后的 bitmap 交集。常见使用场景如:计算用户留存率。
+
+### Syntax
+
+`BITMAP BITMAP_INTERSECT(BITMAP value)`
+
+输入一组 bitmap 值,求这一组 bitmap 值的交集,并返回。
+
+## example
+
+表结构
+
+```
+KeysType: AGG_KEY
+Columns: tag varchar, date datetime, user_id bitmap bitmap_union
+
+```
+
+```
+求今天和昨天不同 tag 下的用户留存
+mysql> select tag, bitmap_intersect(user_id) from (select tag, date,
bitmap_union(user_id) user_id from table where date in ('2020-05-18',
'2020-05-19') group by tag, date) a group by tag;
+```
+
+和 bitmap_to_string 函数组合使用可以获取交集的具体数据
+
+```
+求今天和昨天不同 tag 下留存的用户都是哪些
+mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select
tag, date, bitmap_union(user_id) user_id from table where date in
('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
+```
+
+## keyword
+
+ BITMAP_INTERSECT, BITMAP
diff --git
a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
new file mode 100644
index 0000000..295e118
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
@@ -0,0 +1,58 @@
+---
+{
+ "title": "bitmap_union",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_union
+## description
+
+聚合函数,用于计算分组后的 bitmap 并集。常见使用场景如:计算PV,UV。
+
+### Syntax
+
+`BITMAP BITMAP_UNION(BITMAP value)`
+
+输入一组 bitmap 值,求这一组 bitmap 值的并集,并返回。
+
+## example
+
+```
+mysql> select page_id, bitmap_union(user_id) from table group by page_id;
+```
+
+和 bitmap_count 函数组合使用可以求得网页的 PV 数据
+
+```
+mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by
page_id;
+```
+
+当 user_id 字段为 int 时,上面查询语义等同于
+
+```
+mysql> select page_id, count(distinct user_id) from table group by page_id;
+```
+
+## keyword
+
+ BITMAP_UNION, BITMAP
diff --git a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index d185e40..5709e04 100644
--- a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -407,7 +407,8 @@ public class FunctionCallExpr extends Expr {
if (fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_COUNT)
||
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION)
- ||
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)) {
+ ||
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)
+ ||
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_INTERSECT)) {
if (children.size() != 1) {
throw new AnalysisException(fnName + " function could only
have one child");
}
diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 26b744c..256a461 100644
--- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -540,6 +540,7 @@ public class FunctionSet {
public static final String BITMAP_UNION_INT = "bitmap_union_int";
public static final String BITMAP_COUNT = "bitmap_count";
public static final String INTERSECT_COUNT = "intersect_count";
+ public static final String BITMAP_INTERSECT = "bitmap_intersect";
private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
ImmutableMap.<Type, String>builder()
@@ -1144,6 +1145,7 @@ public class FunctionSet {
null, false, true, false));
}
+ // bitmap
addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION,
Lists.newArrayList(Type.BITMAP),
Type.BITMAP,
Type.VARCHAR,
@@ -1165,6 +1167,15 @@ public class FunctionSet {
null,
"_ZN5doris15BitmapFunctions15bitmap_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
true, true, true));
+ // TODO(ml): supply function symbol
+ addBuiltin(AggregateFunction.createBuiltin(BITMAP_INTERSECT,
Lists.newArrayList(Type.BITMAP),
+ Type.BITMAP, Type.VARCHAR,
+
"_ZN5doris15BitmapFunctions20nullable_bitmap_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+
"_ZN5doris15BitmapFunctions16bitmap_intersectEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
+
"_ZN5doris15BitmapFunctions16bitmap_intersectEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
+
"_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+
"_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+ true, false, true));
//PercentileApprox
addBuiltin(AggregateFunction.createBuiltin("percentile_approx",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]