This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch tpc_preview2 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1239d8fe5202e86c31fb49bc68319ec71308bc46 Author: happenlee <[email protected]> AuthorDate: Wed Nov 26 10:45:13 2025 +0800 support runtime filter in --- be/src/exprs/hybrid_set.h | 37 +++++++++++++++++++++++++++++++++ be/src/vec/exprs/vdirect_in_predicate.h | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index e2c29eed82b..20b4ccd4263 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -257,6 +257,7 @@ public: doris::vectorized::ColumnUInt8::Container& results) = 0; virtual void to_pb(PInFilter* filter) = 0; + virtual uint64_t get_digest(uint64_t seed) = 0; class IteratorBase { public: @@ -412,6 +413,21 @@ public: void to_pb(PInFilter* filter) override { set_pb(filter, get_convertor<ElementType>()); } + uint64_t get_digest(uint64_t seed) override { + std::vector<ElementType> elems(_set.begin(), _set.end()); + std::sort(elems.begin(), elems.end()); + if constexpr (std::is_same<ElementType, bool>::value) { + for (const auto& v : elems) { + seed = HashUtil::crc_hash64(&v, sizeof(v), seed); + } + } else { + seed = HashUtil::crc_hash64(elems.data(), + (uint32_t)(elems.size() * sizeof(ElementType)), seed); + } + + return HashUtil::crc_hash64(&_contain_null, sizeof(_contain_null), seed); + } + private: ContainerType _set; ObjectPool _pool; @@ -589,6 +605,16 @@ public: void to_pb(PInFilter* filter) override { set_pb(filter, get_convertor<std::string>()); } + uint64_t get_digest(uint64_t seed) override { + std::vector<StringRef> elems(_set.begin(), _set.end()); + std::sort(elems.begin(), elems.end()); + + for (const auto& v : elems) { + seed = HashUtil::crc_hash64(v.data, (uint32_t)v.size, seed); + } + return HashUtil::crc_hash64(&_contain_null, sizeof(_contain_null), seed); + } + private: ContainerType _set; ObjectPool _pool; @@ -767,6 +793,17 @@ public: throw Exception(ErrorCode::INTERNAL_ERROR, "StringValueSet do not support to_pb"); } + uint64_t get_digest(uint64_t seed) override { + std::vector<StringRef> elems(_set.begin(), _set.end()); + std::sort(elems.begin(), elems.end()); + + for (const auto& v : elems) { + seed = HashUtil::crc_hash64(v.data, (uint32_t)v.size, seed); + } + + return HashUtil::crc_hash64(&_contain_null, sizeof(_contain_null), seed); + } + private: ContainerType _set; ObjectPool _pool; diff --git a/be/src/vec/exprs/vdirect_in_predicate.h b/be/src/vec/exprs/vdirect_in_predicate.h index bcc8714e0e6..bd360e565a2 100644 --- a/be/src/vec/exprs/vdirect_in_predicate.h +++ b/be/src/vec/exprs/vdirect_in_predicate.h @@ -106,7 +106,7 @@ public: return true; } - uint64_t get_digest(uint64_t seed) const override { return 0; } + uint64_t get_digest(uint64_t seed) const override { return _filter->get_digest(seed); } private: Status _do_execute(VExprContext* context, const Block* block, ColumnPtr& result_column, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
