This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 93e5d8e660 [Vectorized](function) support bitmap_from_array function
(#14259)
93e5d8e660 is described below
commit 93e5d8e66090903677f6d0927ee72a67cd45ee28
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Nov 15 01:55:51 2022 +0800
[Vectorized](function) support bitmap_from_array function (#14259)
---
be/src/vec/functions/function_bitmap.cpp | 82 ++++++++++++++++++++--
.../bitmap-functions/bitmap_from_array.md | 53 ++++++++++++++
docs/sidebars.json | 1 +
.../bitmap-functions/bitmap_from_array.md | 53 ++++++++++++++
gensrc/script/doris_builtins_functions.py | 4 ++
.../array_functions/test_array_functions.out | 9 +++
.../array_functions/test_array_functions.groovy | 2 +
7 files changed, 198 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/functions/function_bitmap.cpp
b/be/src/vec/functions/function_bitmap.cpp
index 5f4483253b..247dcab19b 100644
--- a/be/src/vec/functions/function_bitmap.cpp
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -21,9 +21,12 @@
#include "gutil/strings/numbers.h"
#include "gutil/strings/split.h"
#include "util/string_parser.hpp"
+#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
#include "vec/columns/columns_number.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
#include "vec/functions/function_always_not_nullable.h"
#include "vec/functions/function_bitmap_min_or_max.h"
#include "vec/functions/function_const.h"
@@ -175,6 +178,8 @@ struct ToBitmapWithCheck {
};
struct BitmapFromString {
+ using ArgumentType = DataTypeString;
+
static constexpr auto name = "bitmap_from_string";
static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
@@ -199,6 +204,42 @@ struct BitmapFromString {
}
};
+struct BitmapFromArray {
+ using ArgumentType = DataTypeArray;
+ static constexpr auto name = "bitmap_from_array";
+
+ template <typename ColumnType>
+ static Status vector(const ColumnArray::Offsets64& offset_column_data,
+ const IColumn& nested_column, const NullMap&
nested_null_map,
+ std::vector<BitmapValue>& res, NullMap& null_map) {
+ const auto& nested_column_data = static_cast<const
ColumnType&>(nested_column).get_data();
+ auto size = offset_column_data.size();
+ res.reserve(size);
+ std::vector<uint64_t> bits;
+ for (size_t i = 0; i < size; ++i) {
+ auto curr_offset = offset_column_data[i];
+ auto prev_offset = offset_column_data[i - 1];
+ for (auto j = prev_offset; j < curr_offset; ++j) {
+ auto data = nested_column_data[j];
+ // invaild value
+ if (UNLIKELY(data < 0) || UNLIKELY(nested_null_map[j])) {
+ res.emplace_back();
+ null_map[i] = 1;
+ break;
+ } else {
+ bits.push_back(data);
+ }
+ }
+ //input is valid value
+ if (!null_map[i]) {
+ res.emplace_back(bits);
+ bits.clear();
+ }
+ }
+ return Status::OK();
+ }
+};
+
template <typename Impl>
class FunctionBitmapAlwaysNull : public IFunction {
public:
@@ -227,12 +268,39 @@ public:
ColumnPtr argument_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- const ColumnString* str_column =
check_and_get_column<ColumnString>(argument_column.get());
- const ColumnString::Chars& data = str_column->get_chars();
- const ColumnString::Offsets& offsets = str_column->get_offsets();
-
- Impl::vector(data, offsets, res, null_map);
-
+ if constexpr (std::is_same_v<typename Impl::ArgumentType,
DataTypeString>) {
+ const auto& str_column = static_cast<const
ColumnString&>(*argument_column);
+ const ColumnString::Chars& data = str_column.get_chars();
+ const ColumnString::Offsets& offsets = str_column.get_offsets();
+ Impl::vector(data, offsets, res, null_map);
+ } else if constexpr (std::is_same_v<typename Impl::ArgumentType,
DataTypeArray>) {
+ auto argument_type = remove_nullable(
+ assert_cast<const
DataTypeArray&>(*block.get_by_position(arguments[0]).type)
+ .get_nested_type());
+ const auto& array_column = static_cast<const
ColumnArray&>(*argument_column);
+ const auto& offset_column_data = array_column.get_offsets();
+ const auto& nested_nullable_column =
+ static_cast<const
ColumnNullable&>(array_column.get_data());
+ const auto& nested_column =
nested_nullable_column.get_nested_column();
+ const auto& nested_null_map =
nested_nullable_column.get_null_map_column().get_data();
+ if (check_column<ColumnInt8>(nested_column)) {
+ Impl::template vector<ColumnInt8>(offset_column_data,
nested_column,
+ nested_null_map, res,
null_map);
+ } else if (check_column<ColumnInt16>(nested_column)) {
+ Impl::template vector<ColumnInt16>(offset_column_data,
nested_column,
+ nested_null_map, res,
null_map);
+ } else if (check_column<ColumnInt32>(nested_column)) {
+ Impl::template vector<ColumnInt32>(offset_column_data,
nested_column,
+ nested_null_map, res,
null_map);
+ } else if (check_column<ColumnInt64>(nested_column)) {
+ Impl::template vector<ColumnInt64>(offset_column_data,
nested_column,
+ nested_null_map, res,
null_map);
+ }
+ } else {
+ return Status::RuntimeError("Illegal column {} of argument of
function {}",
+
block.get_by_position(arguments[0]).column->get_name(),
+ get_name());
+ }
block.get_by_position(result).column =
ColumnNullable::create(std::move(res_data_column),
std::move(res_null_map));
return Status::OK();
@@ -695,6 +763,7 @@ using FunctionToBitmap =
FunctionAlwaysNotNullable<ToBitmap>;
using FunctionToBitmapWithCheck = FunctionAlwaysNotNullable<ToBitmapWithCheck,
true>;
using FunctionBitmapFromString = FunctionBitmapAlwaysNull<BitmapFromString>;
+using FunctionBitmapFromArray = FunctionBitmapAlwaysNull<BitmapFromArray>;
using FunctionBitmapHash = FunctionAlwaysNotNullable<BitmapHash<32>>;
using FunctionBitmapHash64 = FunctionAlwaysNotNullable<BitmapHash<64>>;
@@ -724,6 +793,7 @@ void register_function_bitmap(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionToBitmap>();
factory.register_function<FunctionToBitmapWithCheck>();
factory.register_function<FunctionBitmapFromString>();
+ factory.register_function<FunctionBitmapFromArray>();
factory.register_function<FunctionBitmapHash>();
factory.register_function<FunctionBitmapHash64>();
factory.register_function<FunctionBitmapCount>();
diff --git
a/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
new file mode 100644
index 0000000000..d72661911f
--- /dev/null
+++
b/docs/en/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
@@ -0,0 +1,53 @@
+---
+{
+ "title": "bitmap_from_array",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## bitmap_from_array
+
+### description
+#### Syntax
+
+`BITMAP BITMAP_FROM_ARRAY(ARRAY input)`
+
+Convert a TINYINT/SMALLINT/INT/BIGINT array to a BITMAP
+When the input field is illegal, the result returns NULL
+
+### example
+
+```
+mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test;
++------+-----------------------+------------------------------------------------+
+| id | c_array |
bitmap_to_string(bitmap_from_array(`c_array`)) |
++------+-----------------------+------------------------------------------------+
+| 1 | [NULL] | NULL
|
+| 2 | [1, 2, 3, NULL] | NULL
|
+| 2 | [1, 2, 3, -10] | NULL
|
+| 3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7
|
+| 4 | [100, 200, 300, 300] | 100,200,300
|
++------+-----------------------+------------------------------------------------+
+5 rows in set (0.02 sec)
+```
+
+### keywords
+
+ BITMAP_FROM_ARRAY,BITMAP
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 66136ee3bd..ebb6bd885c 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -463,6 +463,7 @@
"sql-manual/sql-functions/bitmap-functions/bitmap_from_string",
"sql-manual/sql-functions/bitmap-functions/bitmap_to_string",
"sql-manual/sql-functions/bitmap-functions/bitmap_to_array",
+
"sql-manual/sql-functions/bitmap-functions/bitmap_from_array",
"sql-manual/sql-functions/bitmap-functions/bitmap_empty",
"sql-manual/sql-functions/bitmap-functions/bitmap_or",
"sql-manual/sql-functions/bitmap-functions/bitmap_and",
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
new file mode 100644
index 0000000000..b794d0e852
--- /dev/null
+++
b/docs/zh-CN/docs/sql-manual/sql-functions/bitmap-functions/bitmap_from_array.md
@@ -0,0 +1,53 @@
+---
+{
+ "title": "bitmap_from_array",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## bitmap_from_array
+
+### description
+#### Syntax
+
+`BITMAP BITMAP_FROM_ARRAY(ARRAY input)`
+
+将一个TINYINT/SMALLINT/INT/BIGINT类型的数组转化为一个BITMAP
+当输入字段不合法时,结果返回NULL
+
+### example
+
+```
+mysql> select *, bitmap_to_string(bitmap_from_array(c_array)) from array_test;
++------+-----------------------+------------------------------------------------+
+| id | c_array |
bitmap_to_string(bitmap_from_array(`c_array`)) |
++------+-----------------------+------------------------------------------------+
+| 1 | [NULL] | NULL
|
+| 2 | [1, 2, 3, NULL] | NULL
|
+| 2 | [1, 2, 3, -10] | NULL
|
+| 3 | [1, 2, 3, 4, 5, 6, 7] | 1,2,3,4,5,6,7
|
+| 4 | [100, 200, 300, 300] | 100,200,300
|
++------+-----------------------+------------------------------------------------+
+5 rows in set (0.02 sec)
+```
+
+### keywords
+
+ BITMAP_FROM_ARRAY,BITMAP
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 1c5694697d..a09ad77f31 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -2610,6 +2610,10 @@ visible_functions = [
[['bitmap_from_string'], 'BITMAP', ['STRING'],
'_ZN5doris15BitmapFunctions18bitmap_from_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', 'ALWAYS_NULLABLE'],
+ [['bitmap_from_array'], 'BITMAP', ['ARRAY_TINYINT'], '', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['bitmap_from_array'], 'BITMAP', ['ARRAY_SMALLINT'], '', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['bitmap_from_array'], 'BITMAP', ['ARRAY_INT'], '', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['bitmap_from_array'], 'BITMAP', ['ARRAY_BIGINT'], '', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['bitmap_contains'], 'BOOLEAN', ['BITMAP','BIGINT'],
'_ZN5doris15BitmapFunctions15bitmap_containsEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValE',
'', '', 'vec', ''],
diff --git
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
index def00a0861..5259da6587 100644
---
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
+++
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
@@ -290,3 +290,12 @@
8 []
9 [9]
+-- !select --
+[1, 2, 3] 1,2,3
+[4] 4
+[]
+[1, 2, 3, 4, 5, 4, 3, 2, 1] 1,2,3,4,5
+[]
+[1, 2, 3, 4, 5, 4, 3, 2, 1] 1,2,3,4,5
+[8, 9, NULL, 10, NULL] \N
+
diff --git
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index 7f9ea92138..b3137feb1d 100644
---
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -105,4 +105,6 @@ suite("test_array_functions") {
qt_select "SELECT k1, array_range(k1) from ${tableName2} ORDER BY k1"
qt_select "SELECT k1, array_range(k1,k2) from ${tableName2} ORDER BY k1"
qt_select "SELECT k1, array_range(k1,k2,k3) from ${tableName2} ORDER BY k1"
+ qt_select "select k2, bitmap_to_string(bitmap_from_array(k2)) from
${tableName} order by k1;"
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]