This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6b6682cd96 [Enhancement](Expr) Opt In Set by small size fixed 
container to improve performance. (#17976)
6b6682cd96 is described below

commit 6b6682cd961c0a6fbffb2ec6988c40ee042aa8a4
Author: Qi Chen <[email protected]>
AuthorDate: Tue Mar 28 23:10:39 2023 +0800

    [Enhancement](Expr) Opt In Set by small size fixed container to improve 
performance. (#17976)
---
 be/src/exprs/create_predicate_function.h           | 115 ++++-
 be/src/exprs/hybrid_set.h                          | 492 ++++++++++++++++++---
 be/src/olap/in_list_predicate.h                    | 271 +++++++++---
 be/src/olap/predicate_creator.h                    |  13 +-
 be/src/runtime/primitive_type.h                    |   3 +
 be/src/vec/columns/column_dictionary.h             |   9 +-
 .../exec/format/parquet/vparquet_group_reader.cpp  |   4 +-
 be/src/vec/exprs/vdirect_in_predicate.h            |  30 +-
 be/src/vec/functions/in.h                          |  46 +-
 9 files changed, 778 insertions(+), 205 deletions(-)

diff --git a/be/src/exprs/create_predicate_function.h 
b/be/src/exprs/create_predicate_function.h
index cbb8a45a3b..5a6d66d1f6 100644
--- a/be/src/exprs/create_predicate_function.h
+++ b/be/src/exprs/create_predicate_function.h
@@ -30,7 +30,7 @@ namespace doris {
 class MinmaxFunctionTraits {
 public:
     using BasePtr = MinMaxFuncBase*;
-    template <PrimitiveType type>
+    template <PrimitiveType type, size_t N>
     static BasePtr get_function() {
         return new MinMaxNumFunc<typename 
PrimitiveTypeTraits<type>::CppType>();
     }
@@ -39,19 +39,29 @@ public:
 class HybridSetTraits {
 public:
     using BasePtr = HybridSetBase*;
-    template <PrimitiveType type>
+    template <PrimitiveType type, size_t N>
     static BasePtr get_function() {
         using CppType = typename PrimitiveTypeTraits<type>::CppType;
-        using Set =
-                std::conditional_t<std::is_same_v<CppType, StringRef>, 
StringSet, HybridSet<type>>;
-        return new Set();
+        if constexpr (N >= 1 && N <= 12) {
+            using Set = std::conditional_t<
+                    std::is_same_v<CppType, StringRef>, StringSet<>,
+                    HybridSet<type,
+                              FixedContainer<typename 
VecPrimitiveTypeTraits<type>::CppType, N>>>;
+            return new Set();
+        } else {
+            using Set = std::conditional_t<
+                    std::is_same_v<CppType, StringRef>, StringSet<>,
+                    HybridSet<type,
+                              DynamicContainer<typename 
VecPrimitiveTypeTraits<type>::CppType>>>;
+            return new Set();
+        }
     }
 };
 
 class BloomFilterTraits {
 public:
     using BasePtr = BloomFilterFuncBase*;
-    template <PrimitiveType type>
+    template <PrimitiveType type, size_t N>
     static BasePtr get_function() {
         return new BloomFilterFunc<type>();
     }
@@ -60,7 +70,7 @@ public:
 class BitmapFilterTraits {
 public:
     using BasePtr = BitmapFilterFuncBase*;
-    template <PrimitiveType type>
+    template <PrimitiveType type, size_t N>
     static BasePtr get_function() {
         return new BitmapFilterFunc<type>();
     }
@@ -69,9 +79,9 @@ public:
 template <class Traits>
 class PredicateFunctionCreator {
 public:
-    template <PrimitiveType type>
+    template <PrimitiveType type, size_t N = 0>
     static typename Traits::BasePtr create() {
-        return Traits::template get_function<type>();
+        return Traits::template get_function<type, N>();
     }
 };
 
@@ -94,20 +104,20 @@ public:
     M(TYPE_DECIMAL64)         \
     M(TYPE_DECIMAL128I)
 
-template <class Traits>
+template <class Traits, size_t N = 0>
 typename Traits::BasePtr create_predicate_function(PrimitiveType type) {
     using Creator = PredicateFunctionCreator<Traits>;
 
     switch (type) {
     case TYPE_BOOLEAN: {
-        return Creator::template create<TYPE_BOOLEAN>();
+        return Creator::template create<TYPE_BOOLEAN, N>();
     }
     case TYPE_DECIMALV2: {
-        return Creator::template create<TYPE_DECIMALV2>();
+        return Creator::template create<TYPE_DECIMALV2, N>();
     }
-#define M(NAME)                                  \
-    case NAME: {                                 \
-        return Creator::template create<NAME>(); \
+#define M(NAME)                                     \
+    case NAME: {                                    \
+        return Creator::template create<NAME, N>(); \
     }
         APPLY_FOR_PRIMTYPE(M)
 #undef M
@@ -142,8 +152,78 @@ inline auto create_minmax_filter(PrimitiveType type) {
     return create_predicate_function<MinmaxFunctionTraits>(type);
 }
 
+template <size_t N = 0>
 inline auto create_set(PrimitiveType type) {
-    return create_predicate_function<HybridSetTraits>(type);
+    return create_predicate_function<HybridSetTraits, N>(type);
+}
+
+inline auto create_set(PrimitiveType type, size_t size) {
+    if (size == 1) {
+        return create_set<1>(type);
+    } else if (size == 2) {
+        return create_set<2>(type);
+    } else if (size == 3) {
+        return create_set<3>(type);
+    } else if (size == 4) {
+        return create_set<4>(type);
+    } else if (size == 5) {
+        return create_set<5>(type);
+    } else if (size == 6) {
+        return create_set<6>(type);
+    } else if (size == 7) {
+        return create_set<7>(type);
+    } else if (size == 8) {
+        return create_set<8>(type);
+    } else if (size == 9) {
+        return create_set<9>(type);
+    } else if (size == 10) {
+        return create_set<10>(type);
+    } else if (size == 11) {
+        return create_set<11>(type);
+    } else if (size == 12) {
+        return create_set<12>(type);
+    } else {
+        return create_set(type);
+    }
+}
+
+template <size_t N = 0>
+inline HybridSetBase* create_string_value_set() {
+    if constexpr (N >= 1 && N <= 12) {
+        return new StringValueSet<FixedContainer<StringRef, N>>();
+    } else {
+        return new StringValueSet();
+    }
+}
+
+inline HybridSetBase* create_string_value_set(size_t size) {
+    if (size == 1) {
+        return create_string_value_set<1>();
+    } else if (size == 2) {
+        return create_string_value_set<2>();
+    } else if (size == 3) {
+        return create_string_value_set<3>();
+    } else if (size == 4) {
+        return create_string_value_set<4>();
+    } else if (size == 5) {
+        return create_string_value_set<5>();
+    } else if (size == 6) {
+        return create_string_value_set<6>();
+    } else if (size == 7) {
+        return create_string_value_set<7>();
+    } else if (size == 8) {
+        return create_string_value_set<8>();
+    } else if (size == 9) {
+        return create_string_value_set<9>();
+    } else if (size == 10) {
+        return create_string_value_set<10>();
+    } else if (size == 11) {
+        return create_string_value_set<11>();
+    } else if (size == 12) {
+        return create_string_value_set<12>();
+    } else {
+        return create_string_value_set();
+    }
 }
 
 inline auto create_bloom_filter(PrimitiveType type) {
@@ -183,7 +263,8 @@ template <PrimitiveType PT>
 ColumnPredicate* create_olap_column_predicate(uint32_t column_id,
                                               const 
std::shared_ptr<HybridSetBase>& filter, int,
                                               const TabletColumn* column = 
nullptr) {
-    return new InListPredicateBase<PT, PredicateType::IN_LIST>(column_id, 
filter, column->length());
+    return create_in_list_predicate<PT, PredicateType::IN_LIST>(column_id, 
filter,
+                                                                
column->length());
 }
 
 template <typename T>
diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index 25606b226e..7b7f041a81 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -23,10 +23,183 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/primitive_type.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
 #include "vec/common/string_ref.h"
 
 namespace doris {
 
+/**
+ * Fix Container can use simd to improve performance. 1 <= N <= 12 can be 
improved performance by test.
+ * @tparam T Element Type
+ * @tparam N Fixed Number
+ */
+template <typename T, size_t N>
+class FixedContainer {
+public:
+    using Self = FixedContainer;
+    using ElementType = T;
+
+    class Iterator;
+
+    FixedContainer() : _size(0) { static_assert(N >= 1 && N <= 12); }
+
+    ~FixedContainer() = default;
+
+    void insert(const T& value) {
+        DCHECK(_size < N);
+        _data[_size++] = value;
+    }
+
+    void insert(Iterator begin, Iterator end) {
+        for (auto iter = begin; iter != end; ++iter) {
+            DCHECK(_size < N);
+            _data[_size++] = (*iter);
+        }
+    }
+
+    // Use '|' instead of '||' has better performance by test.
+    bool find(const T& value) const {
+        if constexpr (N == 1) {
+            return (value == _data[0]);
+        }
+        if constexpr (N == 2) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]);
+        }
+        if constexpr (N == 3) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]);
+        }
+        if constexpr (N == 4) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]);
+        }
+        if constexpr (N == 5) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]);
+        }
+        if constexpr (N == 6) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]);
+        }
+        if constexpr (N == 7) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]) 
|
+                   (uint8_t)(value == _data[6]);
+        }
+        if constexpr (N == 8) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]) 
|
+                   (uint8_t)(value == _data[6]) | (uint8_t)(value == _data[7]);
+        }
+        if constexpr (N == 9) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]) 
|
+                   (uint8_t)(value == _data[6]) | (uint8_t)(value == _data[7]) 
|
+                   (uint8_t)(value == _data[8]);
+        }
+        if constexpr (N == 10) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]) 
|
+                   (uint8_t)(value == _data[6]) | (uint8_t)(value == _data[7]) 
|
+                   (uint8_t)(value == _data[8]) | (uint8_t)(value == _data[9]);
+        }
+        if constexpr (N == 11) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]) 
|
+                   (uint8_t)(value == _data[6]) | (uint8_t)(value == _data[7]) 
|
+                   (uint8_t)(value == _data[8]) | (uint8_t)(value == _data[9]) 
|
+                   (uint8_t)(value == _data[10]);
+        }
+        if constexpr (N == 12) {
+            return (uint8_t)(value == _data[0]) | (uint8_t)(value == _data[1]) 
|
+                   (uint8_t)(value == _data[2]) | (uint8_t)(value == _data[3]) 
|
+                   (uint8_t)(value == _data[4]) | (uint8_t)(value == _data[5]) 
|
+                   (uint8_t)(value == _data[6]) | (uint8_t)(value == _data[7]) 
|
+                   (uint8_t)(value == _data[8]) | (uint8_t)(value == _data[9]) 
|
+                   (uint8_t)(value == _data[10]) | (uint8_t)(value == 
_data[11]);
+        }
+        CHECK(false) << "unreachable path";
+        return false;
+    }
+
+    size_t size() const { return _size; }
+
+    class Iterator {
+    public:
+        explicit Iterator(std::array<T, N>& data, size_t index) : _data(data), 
_index(index) {}
+        Iterator& operator++() {
+            ++_index;
+            return *this;
+        }
+        Iterator operator++(int) {
+            Iterator ret_val = *this;
+            ++(*this);
+            return ret_val;
+        }
+        bool operator==(Iterator other) const { return _index == other._index; 
}
+        bool operator!=(Iterator other) const { return !(*this == other); }
+        T& operator*() const { return _data[_index]; }
+
+        T* operator->() const { return &operator*(); }
+
+        // iterator traits
+        using iterator_category = std::forward_iterator_tag;
+        using difference_type = std::ptrdiff_t;
+        using value_type = T;
+        using pointer = T*;
+        using reference = T&;
+
+    private:
+        std::array<T, N>& _data;
+        size_t _index;
+    };
+    Iterator begin() { return Iterator(_data, 0); }
+    Iterator end() { return Iterator(_data, _size); }
+
+private:
+    std::array<T, N> _data;
+    size_t _size;
+};
+
+/**
+ * Dynamic Container uses phmap::flat_hash_set.
+ * @tparam T Element Type
+ */
+template <typename T>
+class DynamicContainer {
+public:
+    using Self = DynamicContainer;
+    using Iterator = typename phmap::flat_hash_set<T>::iterator;
+    using ElementType = T;
+
+    DynamicContainer() = default;
+    ~DynamicContainer() = default;
+
+    void insert(const T& value) { _set.insert(value); }
+
+    void insert(Iterator begin, Iterator end) { _set.insert(begin, end); }
+
+    bool find(const T& value) const { return _set.contains(value); }
+
+    Iterator begin() { return _set.begin(); }
+
+    Iterator end() { return _set.end(); }
+
+    size_t size() const { return _set.size(); }
+
+private:
+    phmap::flat_hash_set<T> _set;
+};
+
+// TODO Maybe change void* parameter to template parameter better.
 class HybridSetBase {
 public:
     HybridSetBase() = default;
@@ -37,18 +210,47 @@ public:
 
     virtual void insert_fixed_len(const char* data, const int* offsets, int 
number) = 0;
 
-    virtual void insert(HybridSetBase* set) = 0;
+    virtual void insert(HybridSetBase* set) {
+        HybridSetBase::IteratorBase* iter = set->begin();
+        while (iter->has_next()) {
+            const void* value = iter->get_value();
+            insert(value);
+            iter->next();
+        }
+    }
 
     virtual int size() = 0;
-    virtual bool find(const void* data) = 0;
+    virtual bool find(const void* data) const = 0;
     // use in vectorize execute engine
-    virtual bool find(const void* data, size_t) = 0;
+    virtual bool find(const void* data, size_t) const = 0;
 
-    virtual void find_fixed_len(const char* data, const uint8* nullmap, int 
number,
-                                uint8* results) {
+    virtual void find_fixed_len(const char* __restrict data, const uint8* 
__restrict null_map,
+                                int number, uint8* __restrict results) {
         LOG(FATAL) << "HybridSetBase not support find_fixed_len";
     }
 
+    virtual void find_batch(const doris::vectorized::IColumn& column, size_t 
rows,
+                            doris::vectorized::ColumnUInt8::Container& 
results) {
+        LOG(FATAL) << "HybridSetBase not support find_batch";
+    }
+
+    virtual void find_batch_negative(const doris::vectorized::IColumn& column, 
size_t rows,
+                                     
doris::vectorized::ColumnUInt8::Container& results) {
+        LOG(FATAL) << "HybridSetBase not support find_batch_negative";
+    }
+
+    virtual void find_batch_nullable(const doris::vectorized::IColumn& column, 
size_t rows,
+                                     const doris::vectorized::NullMap& 
null_map,
+                                     
doris::vectorized::ColumnUInt8::Container& results) {
+        LOG(FATAL) << "HybridSetBase not support find_batch_nullable";
+    }
+
+    virtual void find_batch_nullable_negative(const 
doris::vectorized::IColumn& column, size_t rows,
+                                              const 
doris::vectorized::NullMap& null_map,
+                                              
doris::vectorized::ColumnUInt8::Container& results) {
+        LOG(FATAL) << "HybridSetBase not support find_batch_nullable_negative";
+    }
+
     class IteratorBase {
     public:
         IteratorBase() = default;
@@ -61,10 +263,34 @@ public:
     virtual IteratorBase* begin() = 0;
 };
 
-template <PrimitiveType T>
+template <typename Type>
+const Type* check_and_get_hybrid_set(const HybridSetBase& column) {
+    return typeid_cast<const Type*>(&column);
+}
+
+template <typename Type>
+const Type* check_and_get_hybrid_set(const HybridSetBase* column) {
+    return typeid_cast<const Type*>(column);
+}
+
+template <typename Type>
+bool check_hybrid_set(const HybridSetBase& column) {
+    return check_and_get_hybrid_set<Type>(&column);
+}
+
+template <typename Type>
+bool check_hybrid_set(const HybridSetBase* column) {
+    return check_and_get_hybrid_set<Type>(column);
+}
+
+template <PrimitiveType T,
+          typename _ContainerType = DynamicContainer<typename 
VecPrimitiveTypeTraits<T>::CppType>,
+          typename _ColumnType = typename 
VecPrimitiveTypeTraits<T>::ColumnType>
 class HybridSet : public HybridSetBase {
 public:
-    using CppType = typename VecPrimitiveTypeTraits<T>::CppType;
+    using ContainerType = _ContainerType;
+    using ElementType = typename ContainerType::ElementType;
+    using ColumnType = _ColumnType;
 
     HybridSet() = default;
 
@@ -75,57 +301,98 @@ public:
             return;
         }
 
-        if constexpr (sizeof(CppType) >= 16) {
+        if constexpr (sizeof(ElementType) >= 16) {
             // for large int, it will core dump with no memcpy
-            CppType value;
-            memcpy(&value, data, sizeof(CppType));
+            ElementType value;
+            memcpy(&value, data, sizeof(ElementType));
             _set.insert(value);
         } else {
-            _set.insert(*reinterpret_cast<const CppType*>(data));
+            _set.insert(*reinterpret_cast<const ElementType*>(data));
         }
     }
     void insert(void* data, size_t) override { insert(data); }
 
     void insert_fixed_len(const char* data, const int* offsets, int number) 
override {
         for (int i = 0; i < number; i++) {
-            insert((void*)((CppType*)data + offsets[i]));
+            insert((void*)((ElementType*)data + offsets[i]));
         }
     }
 
-    void insert(HybridSetBase* set) override {
-        HybridSet<T>* hybrid_set = reinterpret_cast<HybridSet<T>*>(set);
-        _set.insert(hybrid_set->_set.begin(), hybrid_set->_set.end());
-    }
-
     int size() override { return _set.size(); }
 
-    bool find(const void* data) override {
+    bool find(const void* data) const override {
         if (data == nullptr) {
             return false;
         }
 
-        auto it = _set.find(*reinterpret_cast<const CppType*>(data));
-        return !(it == _set.end());
+        return _set.find(*reinterpret_cast<const ElementType*>(data));
     }
 
-    bool find(const void* data, size_t) override { return find(data); }
+    bool find(const void* data, size_t) const override { return find(data); }
 
-    void find_fixed_len(const char* data, const uint8* nullmap, int number,
-                        uint8* results) override {
-        for (int i = 0; i < number; i++) {
-            if (nullmap != nullptr && nullmap[i]) {
-                results[i] = false;
-            } else {
-                results[i] = _set.count(*((CppType*)data + i));
+    void find_fixed_len(const char* __restrict data, const uint8* __restrict 
null_map, int number,
+                        uint8* __restrict results) override {
+        ElementType* value = (ElementType*)data;
+        if (null_map == nullptr) {
+            for (int i = 0; i < number; i++) {
+                results[i] = _set.find(value[i]);
+            }
+        } else {
+            for (int i = 0; i < number; i++) {
+                results[i] = _set.find(value[i]) & !null_map[i];
+            }
+        }
+    }
+
+    void find_batch(const doris::vectorized::IColumn& column, size_t rows,
+                    doris::vectorized::ColumnUInt8::Container& results) 
override {
+        _find_batch<false, false>(column, rows, nullptr, results);
+    }
+
+    void find_batch_negative(const doris::vectorized::IColumn& column, size_t 
rows,
+                             doris::vectorized::ColumnUInt8::Container& 
results) override {
+        _find_batch<false, true>(column, rows, nullptr, results);
+    }
+
+    void find_batch_nullable(const doris::vectorized::IColumn& column, size_t 
rows,
+                             const doris::vectorized::NullMap& null_map,
+                             doris::vectorized::ColumnUInt8::Container& 
results) override {
+        _find_batch<true, false>(column, rows, &null_map, results);
+    }
+
+    void find_batch_nullable_negative(const doris::vectorized::IColumn& 
column, size_t rows,
+                                      const doris::vectorized::NullMap& 
null_map,
+                                      
doris::vectorized::ColumnUInt8::Container& results) override {
+        _find_batch<true, true>(column, rows, &null_map, results);
+    }
+
+    template <bool is_nullable, bool is_negative>
+    void _find_batch(const doris::vectorized::IColumn& column, size_t rows,
+                     const doris::vectorized::NullMap* null_map,
+                     doris::vectorized::ColumnUInt8::Container& results) {
+        auto& col = assert_cast<const ColumnType&>(column);
+        const auto* __restrict data = (ElementType*)col.get_data().data();
+        const uint8_t* __restrict null_map_data;
+        if constexpr (is_nullable) {
+            null_map_data = null_map->data();
+        }
+        auto* __restrict result_data = results.data();
+        for (size_t i = 0; i < rows; ++i) {
+            if constexpr (!is_nullable && !is_negative) {
+                result_data[i] = _set.find(data[i]);
+            } else if constexpr (!is_nullable && is_negative) {
+                result_data[i] = !_set.find(data[i]);
+            } else if constexpr (is_nullable && !is_negative) {
+                result_data[i] = _set.find(data[i]) & (!null_map_data[i]);
+            } else { // (is_nullable && is_negative)
+                result_data[i] = !(_set.find(data[i]) & (!null_map_data[i]));
             }
         }
     }
 
-    template <class _iT>
     class Iterator : public IteratorBase {
     public:
-        Iterator(typename phmap::flat_hash_set<_iT>::iterator begin,
-                 typename phmap::flat_hash_set<_iT>::iterator end)
+        Iterator(typename ContainerType::Iterator begin, typename 
ContainerType::Iterator end)
                 : _begin(begin), _end(end) {}
         ~Iterator() override = default;
         bool has_next() const override { return !(_begin == _end); }
@@ -133,23 +400,26 @@ public:
         void next() override { ++_begin; }
 
     private:
-        typename phmap::flat_hash_set<_iT>::iterator _begin;
-        typename phmap::flat_hash_set<_iT>::iterator _end;
+        typename ContainerType::Iterator _begin;
+        typename ContainerType::Iterator _end;
     };
 
     IteratorBase* begin() override {
-        return _pool.add(new (std::nothrow) Iterator<CppType>(_set.begin(), 
_set.end()));
+        return _pool.add(new (std::nothrow) Iterator(_set.begin(), 
_set.end()));
     }
 
-    phmap::flat_hash_set<CppType>* get_inner_set() { return &_set; }
+    ContainerType* get_inner_set() { return &_set; }
 
 private:
-    phmap::flat_hash_set<CppType> _set;
+    ContainerType _set;
     ObjectPool _pool;
 };
 
+template <typename _ContainerType = DynamicContainer<std::string>>
 class StringSet : public HybridSetBase {
 public:
+    using ContainerType = _ContainerType;
+
     StringSet() = default;
 
     ~StringSet() override = default;
@@ -173,35 +443,72 @@ public:
         LOG(FATAL) << "string set not support insert_fixed_len";
     }
 
-    void insert(HybridSetBase* set) override {
-        StringSet* string_set = reinterpret_cast<StringSet*>(set);
-        _set.insert(string_set->_set.begin(), string_set->_set.end());
-    }
-
     int size() override { return _set.size(); }
 
-    bool find(const void* data) override {
+    bool find(const void* data) const override {
         if (data == nullptr) {
             return false;
         }
 
         auto* value = reinterpret_cast<const StringRef*>(data);
-        std::string_view str_value(const_cast<const char*>(value->data), 
value->size);
-        auto it = _set.find(str_value);
-
-        return !(it == _set.end());
+        std::string str_value(const_cast<const char*>(value->data), 
value->size);
+        return _set.find(str_value);
     }
 
-    bool find(const void* data, size_t size) override {
+    bool find(const void* data, size_t size) const override {
         std::string str_value(reinterpret_cast<const char*>(data), size);
-        auto it = _set.find(str_value);
-        return !(it == _set.end());
+        return _set.find(str_value);
+    }
+
+    void find_batch(const doris::vectorized::IColumn& column, size_t rows,
+                    doris::vectorized::ColumnUInt8::Container& results) 
override {
+        _find_batch<false, false>(column, rows, nullptr, results);
+    }
+
+    void find_batch_negative(const doris::vectorized::IColumn& column, size_t 
rows,
+                             doris::vectorized::ColumnUInt8::Container& 
results) override {
+        _find_batch<false, true>(column, rows, nullptr, results);
+    }
+
+    void find_batch_nullable(const doris::vectorized::IColumn& column, size_t 
rows,
+                             const doris::vectorized::NullMap& null_map,
+                             doris::vectorized::ColumnUInt8::Container& 
results) override {
+        _find_batch<true, false>(column, rows, &null_map, results);
+    }
+
+    void find_batch_nullable_negative(const doris::vectorized::IColumn& 
column, size_t rows,
+                                      const doris::vectorized::NullMap& 
null_map,
+                                      
doris::vectorized::ColumnUInt8::Container& results) override {
+        _find_batch<true, true>(column, rows, &null_map, results);
+    }
+
+    template <bool is_nullable, bool is_negative>
+    void _find_batch(const doris::vectorized::IColumn& column, size_t rows,
+                     const doris::vectorized::NullMap* null_map,
+                     doris::vectorized::ColumnUInt8::Container& results) {
+        auto& col = assert_cast<const 
doris::vectorized::ColumnString&>(column);
+        const uint8_t* __restrict null_map_data;
+        if constexpr (is_nullable) {
+            null_map_data = null_map->data();
+        }
+        auto* __restrict result_data = results.data();
+        for (size_t i = 0; i < rows; ++i) {
+            const auto& string_data = col.get_data_at(i).to_string();
+            if constexpr (!is_nullable && !is_negative) {
+                result_data[i] = _set.find(string_data);
+            } else if constexpr (!is_nullable && is_negative) {
+                result_data[i] = !_set.find(string_data);
+            } else if constexpr (is_nullable && !is_negative) {
+                result_data[i] = _set.find(string_data) & (!null_map_data[i]);
+            } else { // (is_nullable && is_negative)
+                result_data[i] = !(_set.find(string_data) & 
(!null_map_data[i]));
+            }
+        }
     }
 
     class Iterator : public IteratorBase {
     public:
-        Iterator(phmap::flat_hash_set<std::string>::iterator begin,
-                 phmap::flat_hash_set<std::string>::iterator end)
+        Iterator(typename ContainerType::Iterator begin, typename 
ContainerType::Iterator end)
                 : _begin(begin), _end(end) {}
         ~Iterator() override = default;
         bool has_next() const override { return !(_begin == _end); }
@@ -213,8 +520,8 @@ public:
         void next() override { ++_begin; }
 
     private:
-        typename phmap::flat_hash_set<std::string>::iterator _begin;
-        typename phmap::flat_hash_set<std::string>::iterator _end;
+        typename ContainerType::Iterator _begin;
+        typename ContainerType::Iterator _end;
         StringRef _value;
     };
 
@@ -222,18 +529,21 @@ public:
         return _pool.add(new (std::nothrow) Iterator(_set.begin(), 
_set.end()));
     }
 
-    phmap::flat_hash_set<std::string>* get_inner_set() { return &_set; }
+    ContainerType* get_inner_set() { return &_set; }
 
 private:
-    phmap::flat_hash_set<std::string> _set;
+    ContainerType _set;
     ObjectPool _pool;
 };
 
 // note: Two difference from StringSet
 // 1 StringRef has better comparison performance than std::string
 // 2 std::string keeps its own memory, bug StringRef just keeps ptr and len, 
so you the caller should manage memory of StringRef
+template <typename _ContainerType = DynamicContainer<StringRef>>
 class StringValueSet : public HybridSetBase {
 public:
+    using ContainerType = _ContainerType;
+
     StringValueSet() = default;
 
     ~StringValueSet() override = default;
@@ -257,38 +567,74 @@ public:
         LOG(FATAL) << "string set not support insert_fixed_len";
     }
 
-    void insert(HybridSetBase* set) override {
-        StringValueSet* string_set = reinterpret_cast<StringValueSet*>(set);
-        _set.insert(string_set->_set.begin(), string_set->_set.end());
-    }
-
     int size() override { return _set.size(); }
 
-    bool find(const void* data) override {
+    bool find(const void* data) const override {
         if (data == nullptr) {
             return false;
         }
 
         auto* value = reinterpret_cast<const StringRef*>(data);
-        auto it = _set.find(*value);
-
-        return !(it == _set.end());
+        return _set.find(*value);
     }
 
-    bool find(const void* data, size_t size) override {
+    bool find(const void* data, size_t size) const override {
         if (data == nullptr) {
             return false;
         }
 
         StringRef sv(reinterpret_cast<const char*>(data), size);
-        auto it = _set.find(sv);
-        return !(it == _set.end());
+        return _set.find(sv);
+    }
+
+    void find_batch(const doris::vectorized::IColumn& column, size_t rows,
+                    doris::vectorized::ColumnUInt8::Container& results) 
override {
+        _find_batch<false, false>(column, rows, nullptr, results);
+    }
+
+    void find_batch_negative(const doris::vectorized::IColumn& column, size_t 
rows,
+                             doris::vectorized::ColumnUInt8::Container& 
results) override {
+        _find_batch<false, true>(column, rows, nullptr, results);
+    }
+
+    void find_batch_nullable(const doris::vectorized::IColumn& column, size_t 
rows,
+                             const doris::vectorized::NullMap& null_map,
+                             doris::vectorized::ColumnUInt8::Container& 
results) override {
+        _find_batch<true, false>(column, rows, &null_map, results);
+    }
+
+    void find_batch_nullable_negative(const doris::vectorized::IColumn& 
column, size_t rows,
+                                      const doris::vectorized::NullMap& 
null_map,
+                                      
doris::vectorized::ColumnUInt8::Container& results) override {
+        _find_batch<true, true>(column, rows, &null_map, results);
+    }
+
+    template <bool is_nullable, bool is_negative>
+    void _find_batch(const doris::vectorized::IColumn& column, size_t rows,
+                     const doris::vectorized::NullMap* null_map,
+                     doris::vectorized::ColumnUInt8::Container& results) {
+        auto& col = assert_cast<const 
doris::vectorized::ColumnString&>(column);
+        const uint8_t* __restrict null_map_data;
+        if constexpr (is_nullable) {
+            null_map_data = null_map->data();
+        }
+        auto* __restrict result_data = results.data();
+        for (size_t i = 0; i < rows; ++i) {
+            if constexpr (!is_nullable && !is_negative) {
+                result_data[i] = _set.find(col.get_data_at(i));
+            } else if constexpr (!is_nullable && is_negative) {
+                result_data[i] = !_set.find(col.get_data_at(i));
+            } else if constexpr (is_nullable && !is_negative) {
+                result_data[i] = _set.find(col.get_data_at(i)) & 
(!null_map_data[i]);
+            } else { // (is_nullable && is_negative)
+                result_data[i] = !(_set.find(col.get_data_at(i)) & 
(!null_map_data[i]));
+            }
+        }
     }
 
     class Iterator : public IteratorBase {
     public:
-        Iterator(phmap::flat_hash_set<StringRef>::iterator begin,
-                 phmap::flat_hash_set<StringRef>::iterator end)
+        Iterator(typename ContainerType::Iterator begin, typename 
ContainerType::Iterator end)
                 : _begin(begin), _end(end) {}
         ~Iterator() override = default;
         bool has_next() const override { return !(_begin == _end); }
@@ -300,8 +646,8 @@ public:
         void next() override { ++_begin; }
 
     private:
-        typename phmap::flat_hash_set<StringRef>::iterator _begin;
-        typename phmap::flat_hash_set<StringRef>::iterator _end;
+        typename ContainerType::Iterator _begin;
+        typename ContainerType::Iterator _end;
         StringRef _value;
     };
 
@@ -309,10 +655,10 @@ public:
         return _pool.add(new (std::nothrow) Iterator(_set.begin(), 
_set.end()));
     }
 
-    phmap::flat_hash_set<StringRef>* get_inner_set() { return &_set; }
+    ContainerType* get_inner_set() { return &_set; }
 
 private:
-    phmap::flat_hash_set<StringRef> _set;
+    ContainerType _set;
     ObjectPool _pool;
 };
 
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 182abc25a6..bdb4553b62 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -75,7 +75,13 @@ struct std::equal_to<doris::uint24_t> {
 
 namespace doris {
 
-template <PrimitiveType Type, PredicateType PT>
+/**
+ * Use HybridSetType can avoid virtual function call in the loop.
+ * @tparam Type
+ * @tparam PT
+ * @tparam HybridSetType
+ */
+template <PrimitiveType Type, PredicateType PT, typename HybridSetType>
 class InListPredicateBase : public ColumnPredicate {
 public:
     using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
@@ -84,7 +90,7 @@ public:
                         const ConvertFunc& convert, bool is_opposite = false,
                         const TabletColumn* col = nullptr, MemPool* pool = 
nullptr)
             : ColumnPredicate(column_id, is_opposite),
-              _values(new phmap::flat_hash_set<T>()),
+              _values(new HybridSetType()),
               _min_value(type_limit<T>::max()),
               _max_value(type_limit<T>::min()) {
         for (const auto& condition : conditions) {
@@ -97,7 +103,7 @@ public:
             } else {
                 tmp = convert(condition);
             }
-            _values->insert(tmp);
+            _values->insert(&tmp);
             _update_min_max(tmp);
         }
     }
@@ -107,50 +113,69 @@ public:
             : ColumnPredicate(column_id, false),
               _min_value(type_limit<T>::max()),
               _max_value(type_limit<T>::min()) {
-        using HybridSetType = std::conditional_t<is_string_type(Type), 
StringSet, HybridSet<Type>>;
-
         CHECK(hybrid_set != nullptr);
 
         if constexpr (is_string_type(Type) || Type == TYPE_DECIMALV2 || 
is_date_type(Type)) {
-            _values = new phmap::flat_hash_set<T>();
-            auto values = ((HybridSetType*)hybrid_set.get())->get_inner_set();
+            _values = new HybridSetType();
 
             if constexpr (is_string_type(Type)) {
-                // values' type is "phmap::flat_hash_set<std::string>"
-                for (const std::string& value : *values) {
-                    StringRef sv = value;
+                HybridSetBase::IteratorBase* iter = hybrid_set->begin();
+                while (iter->has_next()) {
+                    const StringRef* value = (const 
StringRef*)(iter->get_value());
                     if constexpr (Type == TYPE_CHAR) {
                         _temp_datas.push_back("");
-                        _temp_datas.back().resize(std::max(char_length, 
value.size()));
-                        memcpy(_temp_datas.back().data(), value.data(), 
value.size());
-                        sv = _temp_datas.back();
+                        _temp_datas.back().resize(std::max(char_length, 
value->size));
+                        memcpy(_temp_datas.back().data(), value->data, 
value->size);
+                        const string& str = _temp_datas.back();
+                        _values->insert((void*)str.data(), str.length());
+                    } else {
+                        _values->insert((void*)value->data, value->size);
                     }
-                    _values->insert(sv);
+                    iter->next();
                 }
             } else if constexpr (Type == TYPE_DECIMALV2) {
-                for (auto& value : *values) {
-                    _values->insert({value.int_value(), value.frac_value()});
+                HybridSetBase::IteratorBase* iter = hybrid_set->begin();
+                while (iter->has_next()) {
+                    const DecimalV2Value* value = (const 
DecimalV2Value*)(iter->get_value());
+                    decimal12_t decimal12 = {value->int_value(), 
value->frac_value()};
+                    _values->insert(&decimal12);
+                    iter->next();
                 }
             } else if constexpr (Type == TYPE_DATE) {
-                for (auto& value : *values) {
-                    _values->insert(value.to_olap_date());
+                HybridSetBase::IteratorBase* iter = hybrid_set->begin();
+                while (iter->has_next()) {
+                    const vectorized::VecDateTimeValue* value =
+                            (const 
vectorized::VecDateTimeValue*)(iter->get_value());
+                    uint64_t date = value->to_olap_date();
+                    _values->insert(&date);
+                    iter->next();
                 }
             } else if constexpr (Type == TYPE_DATETIME) {
-                for (auto& value : *values) {
-                    _values->insert(value.to_olap_datetime());
+                HybridSetBase::IteratorBase* iter = hybrid_set->begin();
+                while (iter->has_next()) {
+                    const vectorized::VecDateTimeValue* value =
+                            (const 
vectorized::VecDateTimeValue*)(iter->get_value());
+                    uint64_t date_time = value->to_olap_datetime();
+                    _values->insert(&date_time);
+                    iter->next();
                 }
             } else {
-                CHECK(Type == TYPE_DATETIMEV2 || Type == TYPE_DATEV2);
-                for (auto& value : *values) {
-                    _values->insert(T(value));
+                HybridSetBase::IteratorBase* iter = hybrid_set->begin();
+                while (iter->has_next()) {
+                    const void* value = iter->get_value();
+                    _values->insert(value);
+                    iter->next();
                 }
+                CHECK(Type == TYPE_DATETIMEV2 || Type == TYPE_DATEV2);
             }
         } else {
-            _values = ((HybridSetType*)hybrid_set.get())->get_inner_set();
+            _values = reinterpret_cast<HybridSetType*>(hybrid_set.get());
         }
-
-        for (auto& value : *_values) {
-            _update_min_max(value);
+        HybridSetBase::IteratorBase* iter = _values->begin();
+        while (iter->has_next()) {
+            const T* value = (const T*)(iter->get_value());
+            _update_min_max(*value);
+            iter->next();
         }
     }
 
@@ -173,9 +198,11 @@ public:
             *result -= null_bitmap;
         }
         roaring::Roaring indices;
-        for (auto value : *_values) {
+        HybridSetBase::IteratorBase* iter = _values->begin();
+        while (iter->has_next()) {
+            const void* value = iter->get_value();
             bool exact_match;
-            Status s = iterator->seek_dictionary(&value, &exact_match);
+            Status s = iterator->seek_dictionary(value, &exact_match);
             rowid_t seeked_ordinal = iterator->current_ordinal();
             if (!s.is<ErrorCode::NOT_FOUND>()) {
                 if (!s.ok()) {
@@ -187,6 +214,7 @@ public:
                     indices |= index;
                 }
             }
+            iter->next();
         }
 
         if constexpr (PT == PredicateType::IN_LIST) {
@@ -206,12 +234,15 @@ public:
         auto column_desc = schema.column(_column_id);
         std::string column_name = column_desc->name();
         roaring::Roaring indices;
-        for (auto value : *_values) {
+        HybridSetBase::IteratorBase* iter = _values->begin();
+        while (iter->has_next()) {
+            const void* value = iter->get_value();
             InvertedIndexQueryType query_type = 
InvertedIndexQueryType::EQUAL_QUERY;
             roaring::Roaring index;
-            RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, 
&value, query_type,
+            RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, 
value, query_type,
                                                                num_rows, 
&index));
             indices |= index;
+            iter->next();
         }
         if constexpr (PT == PredicateType::IN_LIST) {
             *result &= indices;
@@ -226,15 +257,15 @@ public:
         if (column.is_nullable()) {
             auto* nullable_col =
                     
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
-            auto& null_bitmap = reinterpret_cast<const 
vectorized::ColumnUInt8&>(
-                                        nullable_col->get_null_map_column())
-                                        .get_data();
+            auto& null_map = reinterpret_cast<const vectorized::ColumnUInt8&>(
+                                     nullable_col->get_null_map_column())
+                                     .get_data();
             auto& nested_col = nullable_col->get_nested_column();
 
             if (_opposite) {
-                return _base_evaluate<true, true>(&nested_col, &null_bitmap, 
sel, size);
+                return _base_evaluate<true, true>(&nested_col, &null_map, sel, 
size);
             } else {
-                return _base_evaluate<true, false>(&nested_col, &null_bitmap, 
sel, size);
+                return _base_evaluate<true, false>(&nested_col, &null_map, 
sel, size);
             }
         } else {
             if (_opposite) {
@@ -328,20 +359,25 @@ public:
 
     bool evaluate_and(const segment_v2::BloomFilter* bf) const override {
         if constexpr (PT == PredicateType::IN_LIST) {
-            for (auto value : *_values) {
+            HybridSetBase::IteratorBase* iter = _values->begin();
+            while (iter->has_next()) {
                 if constexpr (std::is_same_v<T, StringRef>) {
-                    if (bf->test_bytes(value.data, value.size)) {
+                    const StringRef* value = (const 
StringRef*)iter->get_value();
+                    if (bf->test_bytes(value->data, value->size)) {
                         return true;
                     }
                 } else if constexpr (Type == TYPE_DATE) {
-                    if (bf->test_bytes(reinterpret_cast<char*>(&value), 
sizeof(uint24_t))) {
+                    const void* value = iter->get_value();
+                    if (bf->test_bytes(reinterpret_cast<const char*>(value), 
sizeof(uint24_t))) {
                         return true;
                     }
                 } else {
-                    if (bf->test_bytes(reinterpret_cast<char*>(&value), 
sizeof(value))) {
+                    const T* value = (const T*)(iter->get_value());
+                    if (bf->test_bytes(reinterpret_cast<const char*>(value), 
sizeof(*value))) {
                         return true;
                     }
                 }
+                iter->next();
             }
             return false;
         } else {
@@ -355,13 +391,11 @@ public:
 private:
     template <typename LeftT, typename RightT>
     bool _operator(const LeftT& lhs, const RightT& rhs) const {
-        if constexpr (Type == TYPE_BOOLEAN) {
-            DCHECK(_values->size() == 2);
-            return PT == PredicateType::IN_LIST;
-        } else if constexpr (PT == PredicateType::IN_LIST) {
+        if constexpr (PT == PredicateType::IN_LIST) {
             return lhs != rhs;
+        } else {
+            return lhs == rhs;
         }
-        return lhs == rhs;
     }
 
     template <bool is_nullable, bool is_opposite>
@@ -379,7 +413,7 @@ private:
                 DCHECK((segid.first.hi | segid.first.mi | segid.first.lo) != 
0);
                 auto& value_in_dict_flags = 
_segment_id_to_value_in_dict_flags[segid];
                 if (value_in_dict_flags.empty()) {
-                    nested_col_ptr->find_codes(*_values, value_in_dict_flags);
+                    nested_col_ptr->find_codes(_values, value_in_dict_flags);
                 }
 
                 CHECK(value_in_dict_flags.size() == 
nested_col_ptr->dict_size())
@@ -429,19 +463,18 @@ private:
                 }
 
                 if constexpr (!is_opposite) {
-                    if (_operator(_values->find(reinterpret_cast<const 
T&>(data_array[idx])),
-                                  _values->end())) {
+                    if (_operator(_values->find(reinterpret_cast<const 
T*>(&data_array[idx])),
+                                  false)) {
                         sel[new_size++] = idx;
                     }
                 } else {
-                    if (!_operator(_values->find(reinterpret_cast<const 
T&>(data_array[idx])),
-                                   _values->end())) {
+                    if (!_operator(_values->find(reinterpret_cast<const 
T*>(&data_array[idx])),
+                                   false)) {
                         sel[new_size++] = idx;
                     }
                 }
             }
         }
-
         return new_size;
     }
 
@@ -457,7 +490,7 @@ private:
                 auto& value_in_dict_flags =
                         
_segment_id_to_value_in_dict_flags[column->get_rowset_segment_id()];
                 if (value_in_dict_flags.empty()) {
-                    nested_col_ptr->find_codes(*_values, value_in_dict_flags);
+                    nested_col_ptr->find_codes(_values, value_in_dict_flags);
                 }
 
                 for (uint16_t i = 0; i < size; i++) {
@@ -509,14 +542,14 @@ private:
 
                 if constexpr (!is_opposite) {
                     if (is_and ^
-                        _operator(_values->find(reinterpret_cast<const 
T&>(data_array[idx])),
-                                  _values->end())) {
+                        _operator(_values->find(reinterpret_cast<const 
T*>(&data_array[idx])),
+                                  false)) {
                         flags[i] = !is_and;
                     }
                 } else {
                     if (is_and ^
-                        !_operator(_values->find(reinterpret_cast<const 
T&>(data_array[idx])),
-                                   _values->end())) {
+                        !_operator(_values->find(reinterpret_cast<const 
T*>(&data_array[idx])),
+                                   false)) {
                         flags[i] = !is_and;
                     }
                 }
@@ -539,7 +572,7 @@ private:
         }
     }
 
-    phmap::flat_hash_set<T>* _values;
+    HybridSetType* _values;
     mutable std::map<std::pair<RowsetId, uint32_t>, 
std::vector<vectorized::UInt8>>
             _segment_id_to_value_in_dict_flags;
     T _min_value;
@@ -549,4 +582,128 @@ private:
     std::list<std::string> _temp_datas;
 };
 
+template <PrimitiveType Type, PredicateType PT, typename ConditionType, 
typename ConvertFunc,
+          size_t N = 0>
+ColumnPredicate* _create_in_list_predicate(uint32_t column_id, const 
ConditionType& conditions,
+                                           const ConvertFunc& convert, bool 
is_opposite = false,
+                                           const TabletColumn* col = nullptr,
+                                           MemPool* pool = nullptr) {
+    using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
+    if constexpr (N >= 1 && N <= 12) {
+        using Set = std::conditional_t<
+                std::is_same_v<T, StringRef>, 
StringSet<FixedContainer<std::string, N>>,
+                HybridSet<Type, FixedContainer<T, N>,
+                          
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>;
+        return new InListPredicateBase<Type, PT, Set>(column_id, conditions, 
convert, is_opposite,
+                                                      col, pool);
+    } else {
+        using Set = std::conditional_t<
+                std::is_same_v<T, StringRef>, 
StringSet<DynamicContainer<std::string>>,
+                HybridSet<Type, DynamicContainer<T>,
+                          
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>;
+        return new InListPredicateBase<Type, PT, Set>(column_id, conditions, 
convert, is_opposite,
+                                                      col, pool);
+    }
+}
+
+template <PrimitiveType Type, PredicateType PT, typename ConditionType, 
typename ConvertFunc>
+ColumnPredicate* create_in_list_predicate(uint32_t column_id, const 
ConditionType& conditions,
+                                          const ConvertFunc& convert, bool 
is_opposite = false,
+                                          const TabletColumn* col = nullptr,
+                                          MemPool* pool = nullptr) {
+    if (conditions.size() == 1) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
1>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 2) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
2>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 3) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
3>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 4) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
4>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 5) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
5>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 6) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
6>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 7) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
7>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 8) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
8>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 9) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
9>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 10) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
10>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 11) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
11>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else if (conditions.size() == 12) {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc, 
12>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    } else {
+        return _create_in_list_predicate<Type, PT, ConditionType, ConvertFunc>(
+                column_id, conditions, convert, is_opposite, col, pool);
+    }
+}
+
+template <PrimitiveType Type, PredicateType PT, size_t N = 0>
+ColumnPredicate* _create_in_list_predicate(uint32_t column_id,
+                                           const 
std::shared_ptr<HybridSetBase>& hybrid_set,
+                                           size_t char_length = 0) {
+    using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
+    if constexpr (N >= 1 && N <= 12) {
+        using Set = std::conditional_t<
+                std::is_same_v<T, StringRef>, 
StringSet<FixedContainer<std::string, N>>,
+                HybridSet<Type, FixedContainer<T, N>,
+                          
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>;
+        return new InListPredicateBase<Type, PT, Set>(column_id, hybrid_set, 
char_length);
+    } else {
+        using Set = std::conditional_t<
+                std::is_same_v<T, StringRef>, 
StringSet<DynamicContainer<std::string>>,
+                HybridSet<Type, DynamicContainer<T>,
+                          
vectorized::PredicateColumnType<PredicateEvaluateType<Type>>>>;
+        return new InListPredicateBase<Type, PT, Set>(column_id, hybrid_set, 
char_length);
+    }
+}
+
+template <PrimitiveType Type, PredicateType PT>
+ColumnPredicate* create_in_list_predicate(uint32_t column_id,
+                                          const 
std::shared_ptr<HybridSetBase>& hybrid_set,
+                                          size_t char_length = 0) {
+    if (hybrid_set->size() == 1) {
+        return _create_in_list_predicate<Type, PT, 1>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 2) {
+        return _create_in_list_predicate<Type, PT, 2>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 3) {
+        return _create_in_list_predicate<Type, PT, 3>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 4) {
+        return _create_in_list_predicate<Type, PT, 4>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 5) {
+        return _create_in_list_predicate<Type, PT, 5>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 6) {
+        return _create_in_list_predicate<Type, PT, 6>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 7) {
+        return _create_in_list_predicate<Type, PT, 7>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 8) {
+        return _create_in_list_predicate<Type, PT, 8>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 9) {
+        return _create_in_list_predicate<Type, PT, 9>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 10) {
+        return _create_in_list_predicate<Type, PT, 10>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 11) {
+        return _create_in_list_predicate<Type, PT, 11>(column_id, hybrid_set, 
char_length);
+    } else if (hybrid_set->size() == 12) {
+        return _create_in_list_predicate<Type, PT, 12>(column_id, hybrid_set, 
char_length);
+    } else {
+        return _create_in_list_predicate<Type, PT>(column_id, hybrid_set, 
char_length);
+    }
+}
+
 } //namespace doris
diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h
index 754a1ad503..251dc2680a 100644
--- a/be/src/olap/predicate_creator.h
+++ b/be/src/olap/predicate_creator.h
@@ -53,7 +53,8 @@ public:
     ColumnPredicate* create(const TabletColumn& column, int index, const 
ConditionType& conditions,
                             bool opposite, MemPool* pool) override {
         if constexpr (PredicateTypeTraits::is_list(PT)) {
-            return new InListPredicateBase<Type, PT>(index, conditions, 
convert, opposite);
+            return create_in_list_predicate<Type, PT, ConditionType, 
decltype(convert)>(
+                    index, conditions, convert, opposite);
         } else {
             static_assert(PredicateTypeTraits::is_comparison(PT));
             return new ComparisonPredicateBase<Type, PT>(index, 
convert(conditions), opposite);
@@ -82,7 +83,8 @@ public:
     ColumnPredicate* create(const TabletColumn& column, int index, const 
ConditionType& conditions,
                             bool opposite, MemPool* pool) override {
         if constexpr (PredicateTypeTraits::is_list(PT)) {
-            return new InListPredicateBase<Type, PT>(index, conditions, 
convert, opposite, &column);
+            return create_in_list_predicate<Type, PT, ConditionType, 
decltype(convert)>(
+                    index, conditions, convert, opposite, &column);
         } else {
             static_assert(PredicateTypeTraits::is_comparison(PT));
             return new ComparisonPredicateBase<Type, PT>(index, 
convert(column, conditions),
@@ -105,8 +107,8 @@ public:
     ColumnPredicate* create(const TabletColumn& column, int index, const 
ConditionType& conditions,
                             bool opposite, MemPool* pool) override {
         if constexpr (PredicateTypeTraits::is_list(PT)) {
-            return new InListPredicateBase<Type, PT>(index, conditions, 
convert, opposite, &column,
-                                                     pool);
+            return create_in_list_predicate<Type, PT, ConditionType, 
decltype(convert)>(
+                    index, conditions, convert, opposite, &column, pool);
         } else {
             static_assert(PredicateTypeTraits::is_comparison(PT));
             return new ComparisonPredicateBase<Type, PT>(index, 
convert(column, conditions, pool),
@@ -140,7 +142,8 @@ public:
     ColumnPredicate* create(const TabletColumn& column, int index, const 
ConditionType& conditions,
                             bool opposite, MemPool* pool) override {
         if constexpr (PredicateTypeTraits::is_list(PT)) {
-            return new InListPredicateBase<Type, PT>(index, conditions, 
_convert, opposite);
+            return create_in_list_predicate<Type, PT, ConditionType, 
decltype(_convert)>(
+                    index, conditions, _convert, opposite);
         } else {
             static_assert(PredicateTypeTraits::is_comparison(PT));
             return new ComparisonPredicateBase<Type, PT>(index, 
_convert(conditions), opposite);
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index aacb87a8f9..e125a4b4a2 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -268,16 +268,19 @@ struct PredicatePrimitiveTypeTraits<TYPE_DATETIMEV2> {
 template <PrimitiveType type>
 struct VecPrimitiveTypeTraits {
     using CppType = typename PrimitiveTypeTraits<type>::CppType;
+    using ColumnType = typename PrimitiveTypeTraits<type>::ColumnType;
 };
 
 template <>
 struct VecPrimitiveTypeTraits<TYPE_DATE> {
     using CppType = vectorized::VecDateTimeValue;
+    using ColumnType = vectorized::ColumnVector<vectorized::DateTime>;
 };
 
 template <>
 struct VecPrimitiveTypeTraits<TYPE_DATETIME> {
     using CppType = vectorized::VecDateTimeValue;
+    using ColumnType = vectorized::ColumnVector<vectorized::DateTime>;
 };
 
 } // namespace doris
diff --git a/be/src/vec/columns/column_dictionary.h 
b/be/src/vec/columns/column_dictionary.h
index 9e1c25e69f..29fd33f5b1 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -289,8 +289,8 @@ public:
 
     uint32_t get_hash_value(uint32_t idx) const { return 
_dict.get_hash_value(_codes[idx], _type); }
 
-    void find_codes(const phmap::flat_hash_set<StringRef>& values,
-                    std::vector<vectorized::UInt8>& selected) const {
+    template <typename HybridSetType>
+    void find_codes(const HybridSetType* values, 
std::vector<vectorized::UInt8>& selected) const {
         return _dict.find_codes(values, selected);
     }
 
@@ -423,13 +423,14 @@ public:
             return greater ? bound - greater + eq : bound - eq;
         }
 
-        void find_codes(const phmap::flat_hash_set<StringRef>& values,
+        template <typename HybridSetType>
+        void find_codes(const HybridSetType* values,
                         std::vector<vectorized::UInt8>& selected) const {
             size_t dict_word_num = _dict_data->size();
             selected.resize(dict_word_num);
             selected.assign(dict_word_num, false);
             for (size_t i = 0; i < _dict_data->size(); i++) {
-                if (values.find((*_dict_data)[i]) != values.end()) {
+                if (values->find(&((*_dict_data)[i]))) {
                     selected[i] = true;
                 }
             }
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 1b1140cc5e..a25cce7171 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -157,7 +157,6 @@ bool RowGroupReader::_can_filter_by_dict(int slot_id,
             }
         }
     }
-
     return true;
 }
 // This function is copied from
@@ -844,7 +843,8 @@ Status 
RowGroupReader::_rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes,
             node.__set_is_nullable(false);
 
             root = _obj_pool->add(new vectorized::VDirectInPredicate(node));
-            std::shared_ptr<HybridSetBase> 
hybrid_set(create_set(PrimitiveType::TYPE_INT));
+            std::shared_ptr<HybridSetBase> hybrid_set(
+                    create_set(PrimitiveType::TYPE_INT, dict_codes.size()));
             for (int j = 0; j < dict_codes.size(); ++j) {
                 hybrid_set->insert(&dict_codes[j]);
             }
diff --git a/be/src/vec/exprs/vdirect_in_predicate.h 
b/be/src/vec/exprs/vdirect_in_predicate.h
index b717f00d18..b9321a5c2e 100644
--- a/be/src/vec/exprs/vdirect_in_predicate.h
+++ b/be/src/vec/exprs/vdirect_in_predicate.h
@@ -43,30 +43,14 @@ public:
         size_t sz = argument_column->size();
         res_data_column->resize(sz);
 
-        auto ptr = 
((ColumnVector<UInt8>*)res_data_column.get())->get_data().data();
-        auto type = 
WhichDataType(remove_nullable(block->get_by_position(arguments[0]).type));
-        if (type.is_string_or_fixed_string()) {
-            for (size_t i = 0; i < sz; i++) {
-                auto ele = argument_column->get_data_at(i);
-                StringRef v(ele.data, ele.size);
-                ptr[i] = _filter->find(reinterpret_cast<const void*>(&v));
-            }
-        } else if (type.is_int_or_uint() || type.is_float()) {
-            if (argument_column->is_nullable()) {
-                auto column_nested = reinterpret_cast<const 
ColumnNullable*>(argument_column.get())
-                                             ->get_nested_column_ptr();
-                auto column_nullmap = reinterpret_cast<const 
ColumnNullable*>(argument_column.get())
-                                              ->get_null_map_column_ptr();
-                _filter->find_fixed_len(column_nested->get_raw_data().data,
-                                        
(uint8*)column_nullmap->get_raw_data().data, sz, ptr);
-            } else {
-                _filter->find_fixed_len(argument_column->get_raw_data().data, 
nullptr, sz, ptr);
-            }
+        if (argument_column->is_nullable()) {
+            auto column_nested = static_cast<const 
ColumnNullable*>(argument_column.get())
+                                         ->get_nested_column_ptr();
+            auto& null_map =
+                    static_cast<const 
ColumnNullable*>(argument_column.get())->get_null_map_data();
+            _filter->find_batch_nullable(*column_nested, sz, null_map, 
res_data_column->get_data());
         } else {
-            for (size_t i = 0; i < sz; i++) {
-                ptr[i] = _filter->find(
-                        reinterpret_cast<const 
void*>(argument_column->get_data_at(i).data));
-            }
+            _filter->find_batch(*argument_column, sz, 
res_data_column->get_data());
         }
 
         DCHECK(!_data_type->is_nullable());
diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h
index 02b2a7cb62..af7ff2b8a5 100644
--- a/be/src/vec/functions/in.h
+++ b/be/src/vec/functions/in.h
@@ -68,16 +68,18 @@ public:
         }
         std::shared_ptr<InState> state = std::make_shared<InState>();
         context->set_function_state(scope, state);
+        DCHECK(context->get_num_args() >= 1);
         if (context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_CHAR 
||
             context->get_arg_type(0)->type == 
doris::PrimitiveType::TYPE_VARCHAR ||
             context->get_arg_type(0)->type == 
doris::PrimitiveType::TYPE_STRING) {
             // the StringValue's memory is held by FunctionContext, so we can 
use StringValueSet here directly
-            state->hybrid_set.reset(new StringValueSet());
+            
state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args()
 - 1)));
+
         } else {
-            
state->hybrid_set.reset(create_set(context->get_arg_type(0)->type));
+            state->hybrid_set.reset(create_set(context->get_arg_type(0)->type,
+                                               
(size_t)(context->get_num_args() - 1)));
         }
 
-        DCHECK(context->get_num_args() >= 1);
         for (int i = 1; i < context->get_num_args(); ++i) {
             const auto& const_column_ptr = context->get_constant_col(i);
             if (const_column_ptr != nullptr) {
@@ -119,19 +121,17 @@ public:
             if (materialized_column->is_nullable()) {
                 auto* null_col_ptr = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(
                         materialized_column);
-                auto& null_bitmap = reinterpret_cast<const 
vectorized::ColumnUInt8&>(
-                                            
null_col_ptr->get_null_map_column())
-                                            .get_data();
+                auto& null_map = reinterpret_cast<const 
vectorized::ColumnUInt8&>(
+                                         null_col_ptr->get_null_map_column())
+                                         .get_data();
                 auto* nested_col_ptr = 
null_col_ptr->get_nested_column_ptr().get();
                 auto search_hash_set = [&](auto* col_ptr) {
-                    for (size_t i = 0; i < input_rows_count; ++i) {
-                        const auto& ref_data = col_ptr->get_data_at(i);
-                        vec_res[i] =
-                                !null_bitmap[i] &&
-                                
in_state->hybrid_set->find((void*)ref_data.data, ref_data.size);
-                        if constexpr (negative) {
-                            vec_res[i] = !vec_res[i];
-                        }
+                    if constexpr (!negative) {
+                        in_state->hybrid_set->find_batch_nullable(*col_ptr, 
input_rows_count,
+                                                                  null_map, 
vec_res);
+                    } else {
+                        in_state->hybrid_set->find_batch_nullable_negative(
+                                *col_ptr, input_rows_count, null_map, vec_res);
                     }
                 };
 
@@ -146,24 +146,22 @@ public:
 
                 if (!in_state->null_in_set) {
                     for (size_t i = 0; i < input_rows_count; ++i) {
-                        vec_null_map_to[i] = null_bitmap[i];
+                        vec_null_map_to[i] = null_map[i];
                     }
                 } else {
                     for (size_t i = 0; i < input_rows_count; ++i) {
-                        vec_null_map_to[i] = null_bitmap[i] || negative == 
vec_res[i];
+                        vec_null_map_to[i] = null_map[i] || negative == 
vec_res[i];
                     }
                 }
 
             } else { // non-nullable
 
                 auto search_hash_set = [&](auto* col_ptr) {
-                    for (size_t i = 0; i < input_rows_count; ++i) {
-                        const auto& ref_data = col_ptr->get_data_at(i);
-                        vec_res[i] =
-                                
in_state->hybrid_set->find((void*)ref_data.data, ref_data.size);
-                        if constexpr (negative) {
-                            vec_res[i] = !vec_res[i];
-                        }
+                    if constexpr (!negative) {
+                        in_state->hybrid_set->find_batch(*col_ptr, 
input_rows_count, vec_res);
+                    } else {
+                        in_state->hybrid_set->find_batch_negative(*col_ptr, 
input_rows_count,
+                                                                  vec_res);
                     }
                 };
 
@@ -196,7 +194,7 @@ public:
                 }
 
                 std::unique_ptr<HybridSetBase> hybrid_set(
-                        create_set(context->get_arg_type(0)->type));
+                        create_set(context->get_arg_type(0)->type, 
set_columns.size()));
                 bool null_in_set = false;
 
                 for (const auto& set_column : set_columns) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to