This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new ff48ae51e1 [Bug](bitmap) intersect_count function use in string cause
ASAN error (#11936) (#12980)
ff48ae51e1 is described below
commit ff48ae51e17e16cd0b79b2b5922e40a15c31e259
Author: HappenLee <[email protected]>
AuthorDate: Tue Sep 27 08:41:54 2022 +0800
[Bug](bitmap) intersect_count function use in string cause ASAN error
(#11936) (#12980)
---
be/src/util/bitmap_intersect.h | 111 +++++++++++++++++++++
.../aggregate_function_orthogonal_bitmap.cpp | 3 +-
.../aggregate_function_orthogonal_bitmap.h | 13 ++-
3 files changed, 122 insertions(+), 5 deletions(-)
diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h
index dcda6ae5a5..94ff8dc283 100644
--- a/be/src/util/bitmap_intersect.h
+++ b/be/src/util/bitmap_intersect.h
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
+#include <parallel_hashmap/phmap.h>
+
#include "runtime/string_value.h"
#include "udf/udf.h"
#include "util/bitmap_value.h"
@@ -103,6 +105,15 @@ inline char* Helper::write_to<StringValue>(const
StringValue& v, char* dest) {
dest += v.len;
return dest;
}
+
+template <>
+inline char* Helper::write_to<std::string>(const std::string& v, char* dest) {
+ *(uint32_t*)dest = v.size();
+ dest += 4;
+ memcpy(dest, v.c_str(), v.size());
+ dest += v.size();
+ return dest;
+}
// write_to end
template <>
@@ -119,6 +130,11 @@ template <>
inline int32_t Helper::serialize_size<StringValue>(const StringValue& v) {
return v.len + 4;
}
+
+template <>
+inline int32_t Helper::serialize_size<std::string>(const std::string& v) {
+ return v.size() + 4;
+}
// serialize_size end
template <>
@@ -147,6 +163,14 @@ inline void Helper::read_from<StringValue>(const char**
src, StringValue* result
*result = StringValue((char*)*src, length);
*src += length;
}
+
+template <>
+inline void Helper::read_from<std::string>(const char** src, std::string*
result) {
+ int32_t length = *(int32_t*)(*src);
+ *src += 4;
+ *result = std::string((char*)*src, length);
+ *src += length;
+}
// read_from end
} // namespace detail
@@ -242,4 +266,91 @@ private:
std::map<T, BitmapValue> _bitmaps;
};
+template <>
+struct BitmapIntersect<std::string_view> {
+public:
+ BitmapIntersect() = default;
+
+ explicit BitmapIntersect(const char* src) { deserialize(src); }
+
+ void add_key(const std::string_view key) {
+ BitmapValue empty_bitmap;
+ _bitmaps[key] = empty_bitmap;
+ }
+
+ void update(const std::string_view& key, const BitmapValue& bitmap) {
+ if (_bitmaps.find(key) != _bitmaps.end()) {
+ _bitmaps[key] |= bitmap;
+ }
+ }
+
+ void merge(const BitmapIntersect& other) {
+ for (auto& kv : other._bitmaps) {
+ if (_bitmaps.find(kv.first) != _bitmaps.end()) {
+ _bitmaps[kv.first] |= kv.second;
+ } else {
+ _bitmaps[kv.first] = kv.second;
+ }
+ }
+ }
+
+ // intersection
+ BitmapValue intersect() const {
+ BitmapValue result;
+ auto it = _bitmaps.begin();
+ result |= it->second;
+ it++;
+ for (; it != _bitmaps.end(); it++) {
+ result &= it->second;
+ }
+ return result;
+ }
+
+ // calculate the intersection for _bitmaps's bitmap values
+ int64_t intersect_count() const {
+ if (_bitmaps.empty()) {
+ return 0;
+ }
+ return intersect().cardinality();
+ }
+
+ // the serialize size
+ size_t size() {
+ size_t size = 4;
+ for (auto& kv : _bitmaps) {
+ size += detail::Helper::serialize_size(kv.first);
+ size += kv.second.getSizeInBytes();
+ }
+ return size;
+ }
+
+ //must call size() first
+ void serialize(char* dest) {
+ char* writer = dest;
+ *(int32_t*)writer = _bitmaps.size();
+ writer += 4;
+ for (auto& kv : _bitmaps) {
+ writer = detail::Helper::write_to(kv.first, writer);
+ kv.second.write(writer);
+ writer += kv.second.getSizeInBytes();
+ }
+ }
+
+ void deserialize(const char* src) {
+ const char* reader = src;
+ int32_t bitmaps_size = *(int32_t*)reader;
+ reader += 4;
+ for (int32_t i = 0; i < bitmaps_size; i++) {
+ std::string key;
+ detail::Helper::read_from(&reader, &key);
+ BitmapValue bitmap(reader);
+ reader += bitmap.getSizeInBytes();
+ _bitmaps[key] = bitmap;
+ }
+ }
+
+private:
+ phmap::flat_hash_map<std::string, BitmapValue> _bitmaps;
+};
+
} // namespace doris
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
index 470a6c8388..b95608ebbc 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
@@ -47,7 +47,8 @@ AggregateFunctionPtr
create_aggregate_function_orthogonal(const std::string& nam
if (res) {
return res;
} else if (which.is_string_or_fixed_string()) {
- return
std::make_shared<AggFunctionOrthBitmapFunc<Impl<StringValue>>>(argument_types);
+ return
std::make_shared<AggFunctionOrthBitmapFunc<Impl<std::string_view>>>(
+ argument_types);
}
LOG(WARNING) << "Incorrect Type " << argument_type.get_name()
<< " of arguments for aggregate function " << name;
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
index 4f1fb69ec4..0c8b8b9b20 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
@@ -45,8 +45,11 @@ public:
if constexpr (IsNumber<T>) {
bitmap.update(data_col.get_element(row_num), bitmap_value);
- } else {
- bitmap.update(StringValue(data_col.get_data_at(row_num)),
bitmap_value);
+ }
+ if constexpr (std::is_same_v<T, std::string_view>) {
+ // TODO: rethink here we really need to do a virtual function call
+ auto sr = data_col.get_data_at(row_num);
+ bitmap.update(std::string_view {sr.data, sr.size}, bitmap_value);
}
}
@@ -57,8 +60,10 @@ public:
const auto& col = static_cast<const
ColVecData&>(*columns[idx]);
if constexpr (IsNumber<T>) {
bitmap.add_key(col.get_element(row_num));
- } else {
- bitmap.add_key(StringValue(col.get_data_at(row_num)));
+ }
+ if constexpr (std::is_same_v<T, std::string_view>) {
+ auto sr = col.get_data_at(row_num);
+ bitmap.add_key(std::string_view {sr.data, sr.size});
}
}
first_init = false;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]