This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a1b62aad8f [env](compiler) Reduce template instantiations in 
predicate_creator.h for faster compilation (#61858)
1a1b62aad8f is described below

commit 1a1b62aad8f16abf58b2bfca9e9cf8c0c1c6c99d
Author: Mryange <[email protected]>
AuthorDate: Tue Mar 31 15:14:11 2026 +0800

    [env](compiler) Reduce template instantiations in predicate_creator.h for 
faster compilation (#61858)
    
    `predicate_creator.h` was a header-only file containing two heavy
    template functions: `create_in_list_predicate<PT>` and
    `create_comparison_predicate<PT>`. Every `.cpp` file that included this
    header had to independently instantiate massive template class
    hierarchies:
    
    - **InListPredicateBase<TYPE, PT, N>**: 23 types × 2 PT × 9 N = **414
    class instantiations**, each with ~34 member functions (676 LOC class)
    - **ComparisonPredicateBase<TYPE, PT>**: 23 types × 6 PT = **132 class
    instantiations**, each with ~40 member functions (720 LOC class)
    
    This resulted in ~19,000 function instantiations **per consumer file**.
    The top 2 slowest files in the entire BE codebase (`scan_operator.cpp`
    at 143.5s and `delete_handler.cpp` at 141.0s) both included this header
    — confirmed via `-ftime-trace` profiling.
    
    ### What this PR does
    
    1. **Move template definitions from header to `.cpp`**: Replace the full
    template function bodies in `predicate_creator.h` with declarations
    only. Add explicit template instantiations in `.cpp` so the templates
    are compiled once and linked.
    
    2. **Prune heavy includes from the header**: Remove
    `in_list_predicate.h` (676 LOC), `comparison_predicate.h` (720 LOC),
    `bloom_filter_predicate.h`, `null_predicate.h`, and other transitive
    includes that are no longer needed in the header. Add forward
    declarations for `BloomFilterFuncBase` and `BitmapFilterFuncBase`.
    
    3. **Split into 4 `.cpp` files for parallel compilation**: The
    concentrated template instantiations in a single `.cpp` would create a
    new 224s bottleneck. Split by template family to enable parallel builds:
    - `predicate_creator.cpp` — bloom_filter + bitmap_filter (lightweight,
    ~28s)
    - `predicate_creator_in_list_in.cpp` —
    `create_in_list_predicate<IN_LIST>` (~61s)
    - `predicate_creator_in_list_not_in.cpp` —
    `create_in_list_predicate<NOT_IN_LIST>` (~60s)
    - `predicate_creator_comparison.cpp` —
    `create_comparison_predicate<EQ/NE/LT/GT/LE/GE>` (~46s)
    
    4. **Fix broken transitive includes**: Add `#include
    "storage/predicate/null_predicate.h"` to `delete_handler.cpp` which
    previously got it transitively through the old header.
    
    ### Compilation time results (ASAN, single-threaded measurement)
    
    | File | Before (s) | After (s) | Change |
    |------|--------:|--------:|--------|
    | scan_operator.cpp | 143.50 | 45.96 | **-68.0%** |
    | delete_handler.cpp | 141.03 | 29.62 | **-79.0%** |
    | predicate_creator.cpp | 33.05 | 28.13 | -14.9% |
    | predicate_creator_in_list_in.cpp | — | 60.85 | new |
    | predicate_creator_in_list_not_in.cpp | — | 59.84 | new |
    | predicate_creator_comparison.cpp | — | 46.23 | new |
    
    **Parallel build critical path: 143.5s → 60.9s (-57.5%)**
---
 be/src/storage/delete/delete_handler.cpp           |   1 +
 be/src/storage/predicate/predicate_creator.cpp     |   5 +
 be/src/storage/predicate/predicate_creator.h       | 252 +--------------------
 .../predicate/predicate_creator_comparison.cpp     | 153 +++++++++++++
 .../predicate/predicate_creator_in_list_in.cpp     | 166 ++++++++++++++
 .../predicate/predicate_creator_in_list_not_in.cpp | 166 ++++++++++++++
 6 files changed, 499 insertions(+), 244 deletions(-)

diff --git a/be/src/storage/delete/delete_handler.cpp 
b/be/src/storage/delete/delete_handler.cpp
index c13c17700a5..8aab3c42296 100644
--- a/be/src/storage/delete/delete_handler.cpp
+++ b/be/src/storage/delete/delete_handler.cpp
@@ -30,6 +30,7 @@
 #include "core/data_type_serde/data_type_serde.h"
 #include "storage/olap_common.h"
 #include "storage/predicate/block_column_predicate.h"
+#include "storage/predicate/null_predicate.h"
 #include "storage/predicate/predicate_creator.h"
 #include "storage/tablet/tablet_schema.h"
 #include "storage/utils.h"
diff --git a/be/src/storage/predicate/predicate_creator.cpp 
b/be/src/storage/predicate/predicate_creator.cpp
index 1c57a754e8a..6bfa938a4d2 100644
--- a/be/src/storage/predicate/predicate_creator.cpp
+++ b/be/src/storage/predicate/predicate_creator.cpp
@@ -17,6 +17,11 @@
 
 #include "storage/predicate/predicate_creator.h"
 
+#include "common/exception.h"
+#include "exprs/create_predicate_function.h"
+#include "storage/predicate/bitmap_filter_predicate.h"
+#include "storage/predicate/bloom_filter_predicate.h"
+
 namespace doris {
 
 std::shared_ptr<ColumnPredicate> create_bloom_filter_predicate(
diff --git a/be/src/storage/predicate/predicate_creator.h 
b/be/src/storage/predicate/predicate_creator.h
index 3fd5e99b1ec..54f18838359 100644
--- a/be/src/storage/predicate/predicate_creator.h
+++ b/be/src/storage/predicate/predicate_creator.h
@@ -17,271 +17,35 @@
 
 #pragma once
 
-#include <fast_float/fast_float.h>
-
-#include <charconv>
-#include <stdexcept>
+#include <memory>
 #include <string>
-#include <type_traits>
 
-#include "common/exception.h"
-#include "common/status.h"
 #include "core/data_type/data_type.h"
-#include "core/data_type/define_primitive_type.h"
 #include "core/data_type/primitive_type.h"
-#include "core/string_ref.h"
-#include "exprs/create_predicate_function.h"
-#include "exprs/function/cast/cast_parameters.h"
-#include "exprs/function/cast/cast_to_basic_number_common.h"
+#include "core/field.h"
 #include "exprs/hybrid_set.h"
-#include "storage/olap_utils.h"
-#include "storage/predicate/bloom_filter_predicate.h"
 #include "storage/predicate/column_predicate.h"
-#include "storage/predicate/comparison_predicate.h"
-#include "storage/predicate/in_list_predicate.h"
-#include "storage/predicate/null_predicate.h"
-#include "storage/tablet/tablet_schema.h"
-#include "util/date_func.h"
-#include "util/string_util.h"
 
 namespace doris {
 #include "common/compile_check_begin.h"
 
-template <PrimitiveType TYPE, PredicateType PT>
-std::shared_ptr<ColumnPredicate> create_in_list_predicate(const uint32_t cid,
-                                                          const std::string 
col_name,
-                                                          const 
std::shared_ptr<HybridSetBase>& set,
-                                                          bool is_opposite,
-                                                          size_t char_length = 
0) {
-    auto set_size = set->size();
-    if (set_size == 1) {
-        return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 2) {
-        return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 3) {
-        return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 4) {
-        return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 5) {
-        return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 6) {
-        return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == 7) {
-        return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
-        return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name, 
set, is_opposite,
-                                                               char_length);
-    } else {
-        return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE + 
1>::create_shared(
-                cid, col_name, set, is_opposite, char_length);
-    }
-}
+class BloomFilterFuncBase;
+class BitmapFilterFuncBase;
 
+// Defined in predicate_creator.cpp with explicit instantiations.
 template <PredicateType PT>
 std::shared_ptr<ColumnPredicate> create_in_list_predicate(const uint32_t cid,
                                                           const std::string 
col_name,
                                                           const DataTypePtr& 
data_type,
                                                           const 
std::shared_ptr<HybridSetBase> set,
-                                                          bool is_opposite) {
-    switch (data_type->get_primitive_type()) {
-    case TYPE_TINYINT: {
-        return create_in_list_predicate<TYPE_TINYINT, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_SMALLINT: {
-        return create_in_list_predicate<TYPE_SMALLINT, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_INT: {
-        return create_in_list_predicate<TYPE_INT, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_BIGINT: {
-        return create_in_list_predicate<TYPE_BIGINT, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_LARGEINT: {
-        return create_in_list_predicate<TYPE_LARGEINT, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_FLOAT: {
-        return create_in_list_predicate<TYPE_FLOAT, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_DOUBLE: {
-        return create_in_list_predicate<TYPE_DOUBLE, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_DECIMALV2: {
-        return create_in_list_predicate<TYPE_DECIMALV2, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_DECIMAL32: {
-        return create_in_list_predicate<TYPE_DECIMAL32, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_DECIMAL64: {
-        return create_in_list_predicate<TYPE_DECIMAL64, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_DECIMAL128I: {
-        return create_in_list_predicate<TYPE_DECIMAL128I, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_DECIMAL256: {
-        return create_in_list_predicate<TYPE_DECIMAL256, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_CHAR: {
-        return create_in_list_predicate<TYPE_CHAR, PT>(
-                cid, col_name, set, is_opposite,
-                assert_cast<const 
DataTypeString*>(remove_nullable(data_type).get())->len());
-    }
-    case TYPE_VARCHAR: {
-        return create_in_list_predicate<TYPE_VARCHAR, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_STRING: {
-        return create_in_list_predicate<TYPE_STRING, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_DATE: {
-        return create_in_list_predicate<TYPE_DATE, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_DATEV2: {
-        return create_in_list_predicate<TYPE_DATEV2, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_DATETIME: {
-        return create_in_list_predicate<TYPE_DATETIME, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_DATETIMEV2: {
-        return create_in_list_predicate<TYPE_DATETIMEV2, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_TIMESTAMPTZ: {
-        return create_in_list_predicate<TYPE_TIMESTAMPTZ, PT>(cid, col_name, 
set, is_opposite);
-    }
-    case TYPE_BOOLEAN: {
-        return create_in_list_predicate<TYPE_BOOLEAN, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_IPV4: {
-        return create_in_list_predicate<TYPE_IPV4, PT>(cid, col_name, set, 
is_opposite);
-    }
-    case TYPE_IPV6: {
-        return create_in_list_predicate<TYPE_IPV6, PT>(cid, col_name, set, 
is_opposite);
-    }
-    default:
-        throw Exception(Status::InternalError("Unsupported type {} for 
in_predicate",
-                                              
type_to_string(data_type->get_primitive_type())));
-        return nullptr;
-    }
-}
+                                                          bool is_opposite);
 
+// Defined in predicate_creator.cpp with explicit instantiations.
 template <PredicateType PT>
 std::shared_ptr<ColumnPredicate> create_comparison_predicate(const uint32_t 
cid,
                                                              const std::string 
col_name,
                                                              const 
DataTypePtr& data_type,
-                                                             const Field& 
value, bool opposite) {
-    switch (data_type->get_primitive_type()) {
-    case TYPE_TINYINT: {
-        return ComparisonPredicateBase<TYPE_TINYINT, PT>::create_shared(cid, 
col_name, value,
-                                                                        
opposite);
-    }
-    case TYPE_SMALLINT: {
-        return ComparisonPredicateBase<TYPE_SMALLINT, PT>::create_shared(cid, 
col_name, value,
-                                                                         
opposite);
-    }
-    case TYPE_INT: {
-        return ComparisonPredicateBase<TYPE_INT, PT>::create_shared(cid, 
col_name, value, opposite);
-    }
-    case TYPE_BIGINT: {
-        return ComparisonPredicateBase<TYPE_BIGINT, PT>::create_shared(cid, 
col_name, value,
-                                                                       
opposite);
-    }
-    case TYPE_LARGEINT: {
-        return ComparisonPredicateBase<TYPE_LARGEINT, PT>::create_shared(cid, 
col_name, value,
-                                                                         
opposite);
-    }
-    case TYPE_FLOAT: {
-        return ComparisonPredicateBase<TYPE_FLOAT, PT>::create_shared(cid, 
col_name, value,
-                                                                      
opposite);
-    }
-    case TYPE_DOUBLE: {
-        return ComparisonPredicateBase<TYPE_DOUBLE, PT>::create_shared(cid, 
col_name, value,
-                                                                       
opposite);
-    }
-    case TYPE_DECIMALV2: {
-        return ComparisonPredicateBase<TYPE_DECIMALV2, PT>::create_shared(cid, 
col_name, value,
-                                                                          
opposite);
-    }
-    case TYPE_DECIMAL32: {
-        return ComparisonPredicateBase<TYPE_DECIMAL32, PT>::create_shared(cid, 
col_name, value,
-                                                                          
opposite);
-    }
-    case TYPE_DECIMAL64: {
-        return ComparisonPredicateBase<TYPE_DECIMAL64, PT>::create_shared(cid, 
col_name, value,
-                                                                          
opposite);
-    }
-    case TYPE_DECIMAL128I: {
-        return ComparisonPredicateBase<TYPE_DECIMAL128I, 
PT>::create_shared(cid, col_name, value,
-                                                                            
opposite);
-    }
-    case TYPE_DECIMAL256: {
-        return ComparisonPredicateBase<TYPE_DECIMAL256, 
PT>::create_shared(cid, col_name, value,
-                                                                           
opposite);
-    }
-    case TYPE_CHAR: {
-        auto target = std::max(cast_set<size_t>(assert_cast<const 
DataTypeString*>(
-                                                        
remove_nullable(data_type).get())
-                                                        ->len()),
-                               value.template get<TYPE_CHAR>().size());
-        if (target > value.template get<TYPE_CHAR>().size()) {
-            std::string tmp(target, '\0');
-            memcpy(tmp.data(), value.template get<TYPE_CHAR>().data(),
-                   value.template get<TYPE_CHAR>().size());
-            return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
-                    cid, col_name, 
Field::create_field<TYPE_CHAR>(std::move(tmp)), opposite);
-        } else {
-            return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
-                    cid, col_name, 
Field::create_field<TYPE_CHAR>(value.template get<TYPE_CHAR>()),
-                    opposite);
-        }
-    }
-    case TYPE_VARCHAR:
-    case TYPE_STRING: {
-        return ComparisonPredicateBase<TYPE_STRING, PT>::create_shared(cid, 
col_name, value,
-                                                                       
opposite);
-    }
-    case TYPE_DATE: {
-        return ComparisonPredicateBase<TYPE_DATE, PT>::create_shared(cid, 
col_name, value,
-                                                                     opposite);
-    }
-    case TYPE_DATEV2: {
-        return ComparisonPredicateBase<TYPE_DATEV2, PT>::create_shared(cid, 
col_name, value,
-                                                                       
opposite);
-    }
-    case TYPE_DATETIME: {
-        return ComparisonPredicateBase<TYPE_DATETIME, PT>::create_shared(cid, 
col_name, value,
-                                                                         
opposite);
-    }
-    case TYPE_DATETIMEV2: {
-        return ComparisonPredicateBase<TYPE_DATETIMEV2, 
PT>::create_shared(cid, col_name, value,
-                                                                           
opposite);
-    }
-    case TYPE_TIMESTAMPTZ: {
-        return ComparisonPredicateBase<TYPE_TIMESTAMPTZ, 
PT>::create_shared(cid, col_name, value,
-                                                                            
opposite);
-    }
-    case TYPE_BOOLEAN: {
-        return ComparisonPredicateBase<TYPE_BOOLEAN, PT>::create_shared(cid, 
col_name, value,
-                                                                        
opposite);
-    }
-    case TYPE_IPV4: {
-        return ComparisonPredicateBase<TYPE_IPV4, PT>::create_shared(cid, 
col_name, value,
-                                                                     opposite);
-    }
-    case TYPE_IPV6: {
-        return ComparisonPredicateBase<TYPE_IPV6, PT>::create_shared(cid, 
col_name, value,
-                                                                     opposite);
-    }
-    default:
-        throw Exception(Status::InternalError("Unsupported type {} for 
comparison_predicate",
-                                              
type_to_string(data_type->get_primitive_type())));
-        return nullptr;
-    }
-}
+                                                             const Field& 
value, bool opposite);
 
 template <PrimitiveType TYPE>
 std::shared_ptr<HybridSetBase> build_set() {
diff --git a/be/src/storage/predicate/predicate_creator_comparison.cpp 
b/be/src/storage/predicate/predicate_creator_comparison.cpp
new file mode 100644
index 00000000000..bfec1262cfc
--- /dev/null
+++ b/be/src/storage/predicate/predicate_creator_comparison.cpp
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/predicate/comparison_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+
+template <PredicateType PT>
+std::shared_ptr<ColumnPredicate> create_comparison_predicate(const uint32_t 
cid,
+                                                             const std::string 
col_name,
+                                                             const 
DataTypePtr& data_type,
+                                                             const Field& 
value, bool opposite) {
+    switch (data_type->get_primitive_type()) {
+    case TYPE_TINYINT: {
+        return ComparisonPredicateBase<TYPE_TINYINT, PT>::create_shared(cid, 
col_name, value,
+                                                                        
opposite);
+    }
+    case TYPE_SMALLINT: {
+        return ComparisonPredicateBase<TYPE_SMALLINT, PT>::create_shared(cid, 
col_name, value,
+                                                                         
opposite);
+    }
+    case TYPE_INT: {
+        return ComparisonPredicateBase<TYPE_INT, PT>::create_shared(cid, 
col_name, value, opposite);
+    }
+    case TYPE_BIGINT: {
+        return ComparisonPredicateBase<TYPE_BIGINT, PT>::create_shared(cid, 
col_name, value,
+                                                                       
opposite);
+    }
+    case TYPE_LARGEINT: {
+        return ComparisonPredicateBase<TYPE_LARGEINT, PT>::create_shared(cid, 
col_name, value,
+                                                                         
opposite);
+    }
+    case TYPE_FLOAT: {
+        return ComparisonPredicateBase<TYPE_FLOAT, PT>::create_shared(cid, 
col_name, value,
+                                                                      
opposite);
+    }
+    case TYPE_DOUBLE: {
+        return ComparisonPredicateBase<TYPE_DOUBLE, PT>::create_shared(cid, 
col_name, value,
+                                                                       
opposite);
+    }
+    case TYPE_DECIMALV2: {
+        return ComparisonPredicateBase<TYPE_DECIMALV2, PT>::create_shared(cid, 
col_name, value,
+                                                                          
opposite);
+    }
+    case TYPE_DECIMAL32: {
+        return ComparisonPredicateBase<TYPE_DECIMAL32, PT>::create_shared(cid, 
col_name, value,
+                                                                          
opposite);
+    }
+    case TYPE_DECIMAL64: {
+        return ComparisonPredicateBase<TYPE_DECIMAL64, PT>::create_shared(cid, 
col_name, value,
+                                                                          
opposite);
+    }
+    case TYPE_DECIMAL128I: {
+        return ComparisonPredicateBase<TYPE_DECIMAL128I, 
PT>::create_shared(cid, col_name, value,
+                                                                            
opposite);
+    }
+    case TYPE_DECIMAL256: {
+        return ComparisonPredicateBase<TYPE_DECIMAL256, 
PT>::create_shared(cid, col_name, value,
+                                                                           
opposite);
+    }
+    case TYPE_CHAR: {
+        auto target = std::max(cast_set<size_t>(assert_cast<const 
DataTypeString*>(
+                                                        
remove_nullable(data_type).get())
+                                                        ->len()),
+                               value.template get<TYPE_CHAR>().size());
+        if (target > value.template get<TYPE_CHAR>().size()) {
+            std::string tmp(target, '\0');
+            memcpy(tmp.data(), value.template get<TYPE_CHAR>().data(),
+                   value.template get<TYPE_CHAR>().size());
+            return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
+                    cid, col_name, 
Field::create_field<TYPE_CHAR>(std::move(tmp)), opposite);
+        } else {
+            return ComparisonPredicateBase<TYPE_CHAR, PT>::create_shared(
+                    cid, col_name, 
Field::create_field<TYPE_CHAR>(value.template get<TYPE_CHAR>()),
+                    opposite);
+        }
+    }
+    case TYPE_VARCHAR:
+    case TYPE_STRING: {
+        return ComparisonPredicateBase<TYPE_STRING, PT>::create_shared(cid, 
col_name, value,
+                                                                       
opposite);
+    }
+    case TYPE_DATE: {
+        return ComparisonPredicateBase<TYPE_DATE, PT>::create_shared(cid, 
col_name, value,
+                                                                     opposite);
+    }
+    case TYPE_DATEV2: {
+        return ComparisonPredicateBase<TYPE_DATEV2, PT>::create_shared(cid, 
col_name, value,
+                                                                       
opposite);
+    }
+    case TYPE_DATETIME: {
+        return ComparisonPredicateBase<TYPE_DATETIME, PT>::create_shared(cid, 
col_name, value,
+                                                                         
opposite);
+    }
+    case TYPE_DATETIMEV2: {
+        return ComparisonPredicateBase<TYPE_DATETIMEV2, 
PT>::create_shared(cid, col_name, value,
+                                                                           
opposite);
+    }
+    case TYPE_TIMESTAMPTZ: {
+        return ComparisonPredicateBase<TYPE_TIMESTAMPTZ, 
PT>::create_shared(cid, col_name, value,
+                                                                            
opposite);
+    }
+    case TYPE_BOOLEAN: {
+        return ComparisonPredicateBase<TYPE_BOOLEAN, PT>::create_shared(cid, 
col_name, value,
+                                                                        
opposite);
+    }
+    case TYPE_IPV4: {
+        return ComparisonPredicateBase<TYPE_IPV4, PT>::create_shared(cid, 
col_name, value,
+                                                                     opposite);
+    }
+    case TYPE_IPV6: {
+        return ComparisonPredicateBase<TYPE_IPV6, PT>::create_shared(cid, 
col_name, value,
+                                                                     opposite);
+    }
+    default:
+        throw Exception(Status::InternalError("Unsupported type {} for 
comparison_predicate",
+                                              
type_to_string(data_type->get_primitive_type())));
+        return nullptr;
+    }
+}
+
+template std::shared_ptr<ColumnPredicate> 
create_comparison_predicate<PredicateType::EQ>(
+        const uint32_t, const std::string, const DataTypePtr&, const Field&, 
bool);
+template std::shared_ptr<ColumnPredicate> 
create_comparison_predicate<PredicateType::NE>(
+        const uint32_t, const std::string, const DataTypePtr&, const Field&, 
bool);
+template std::shared_ptr<ColumnPredicate> 
create_comparison_predicate<PredicateType::LT>(
+        const uint32_t, const std::string, const DataTypePtr&, const Field&, 
bool);
+template std::shared_ptr<ColumnPredicate> 
create_comparison_predicate<PredicateType::GT>(
+        const uint32_t, const std::string, const DataTypePtr&, const Field&, 
bool);
+template std::shared_ptr<ColumnPredicate> 
create_comparison_predicate<PredicateType::LE>(
+        const uint32_t, const std::string, const DataTypePtr&, const Field&, 
bool);
+template std::shared_ptr<ColumnPredicate> 
create_comparison_predicate<PredicateType::GE>(
+        const uint32_t, const std::string, const DataTypePtr&, const Field&, 
bool);
+
+} // namespace doris
diff --git a/be/src/storage/predicate/predicate_creator_in_list_in.cpp 
b/be/src/storage/predicate/predicate_creator_in_list_in.cpp
new file mode 100644
index 00000000000..7b40a9adef6
--- /dev/null
+++ b/be/src/storage/predicate/predicate_creator_in_list_in.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/predicate/in_list_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+
+template <PrimitiveType TYPE, PredicateType PT>
+static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
+        const uint32_t cid, const std::string col_name, const 
std::shared_ptr<HybridSetBase>& set,
+        bool is_opposite, size_t char_length = 0) {
+    auto set_size = set->size();
+    if (set_size == 1) {
+        return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 2) {
+        return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 3) {
+        return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 4) {
+        return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 5) {
+        return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 6) {
+        return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 7) {
+        return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+        return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else {
+        return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE + 
1>::create_shared(
+                cid, col_name, set, is_opposite, char_length);
+    }
+}
+
+template <>
+std::shared_ptr<ColumnPredicate> 
create_in_list_predicate<PredicateType::IN_LIST>(
+        const uint32_t cid, const std::string col_name, const DataTypePtr& 
data_type,
+        const std::shared_ptr<HybridSetBase> set, bool is_opposite) {
+    switch (data_type->get_primitive_type()) {
+    case TYPE_TINYINT: {
+        return create_in_list_predicate_impl<TYPE_TINYINT, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_SMALLINT: {
+        return create_in_list_predicate_impl<TYPE_SMALLINT, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_INT: {
+        return create_in_list_predicate_impl<TYPE_INT, 
PredicateType::IN_LIST>(cid, col_name, set,
+                                                                               
is_opposite);
+    }
+    case TYPE_BIGINT: {
+        return create_in_list_predicate_impl<TYPE_BIGINT, 
PredicateType::IN_LIST>(cid, col_name,
+                                                                               
   set, is_opposite);
+    }
+    case TYPE_LARGEINT: {
+        return create_in_list_predicate_impl<TYPE_LARGEINT, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_FLOAT: {
+        return create_in_list_predicate_impl<TYPE_FLOAT, 
PredicateType::IN_LIST>(cid, col_name, set,
+                                                                               
  is_opposite);
+    }
+    case TYPE_DOUBLE: {
+        return create_in_list_predicate_impl<TYPE_DOUBLE, 
PredicateType::IN_LIST>(cid, col_name,
+                                                                               
   set, is_opposite);
+    }
+    case TYPE_DECIMALV2: {
+        return create_in_list_predicate_impl<TYPE_DECIMALV2, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL32: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL32, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL64: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL64, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL128I: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL128I, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL256: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL256, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_CHAR: {
+        return create_in_list_predicate_impl<TYPE_CHAR, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite,
+                assert_cast<const 
DataTypeString*>(remove_nullable(data_type).get())->len());
+    }
+    case TYPE_VARCHAR: {
+        return create_in_list_predicate_impl<TYPE_VARCHAR, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_STRING: {
+        return create_in_list_predicate_impl<TYPE_STRING, 
PredicateType::IN_LIST>(cid, col_name,
+                                                                               
   set, is_opposite);
+    }
+    case TYPE_DATE: {
+        return create_in_list_predicate_impl<TYPE_DATE, 
PredicateType::IN_LIST>(cid, col_name, set,
+                                                                               
 is_opposite);
+    }
+    case TYPE_DATEV2: {
+        return create_in_list_predicate_impl<TYPE_DATEV2, 
PredicateType::IN_LIST>(cid, col_name,
+                                                                               
   set, is_opposite);
+    }
+    case TYPE_DATETIME: {
+        return create_in_list_predicate_impl<TYPE_DATETIME, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DATETIMEV2: {
+        return create_in_list_predicate_impl<TYPE_DATETIMEV2, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_TIMESTAMPTZ: {
+        return create_in_list_predicate_impl<TYPE_TIMESTAMPTZ, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_BOOLEAN: {
+        return create_in_list_predicate_impl<TYPE_BOOLEAN, 
PredicateType::IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_IPV4: {
+        return create_in_list_predicate_impl<TYPE_IPV4, 
PredicateType::IN_LIST>(cid, col_name, set,
+                                                                               
 is_opposite);
+    }
+    case TYPE_IPV6: {
+        return create_in_list_predicate_impl<TYPE_IPV6, 
PredicateType::IN_LIST>(cid, col_name, set,
+                                                                               
 is_opposite);
+    }
+    default:
+        throw Exception(Status::InternalError("Unsupported type {} for 
in_predicate",
+                                              
type_to_string(data_type->get_primitive_type())));
+        return nullptr;
+    }
+}
+
+} // namespace doris
diff --git a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp 
b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
new file mode 100644
index 00000000000..be28f2fabc1
--- /dev/null
+++ b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/data_type_string.h"
+#include "storage/predicate/in_list_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+
+template <PrimitiveType TYPE, PredicateType PT>
+static std::shared_ptr<ColumnPredicate> create_in_list_predicate_impl(
+        const uint32_t cid, const std::string col_name, const 
std::shared_ptr<HybridSetBase>& set,
+        bool is_opposite, size_t char_length = 0) {
+    auto set_size = set->size();
+    if (set_size == 1) {
+        return InListPredicateBase<TYPE, PT, 1>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 2) {
+        return InListPredicateBase<TYPE, PT, 2>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 3) {
+        return InListPredicateBase<TYPE, PT, 3>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 4) {
+        return InListPredicateBase<TYPE, PT, 4>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 5) {
+        return InListPredicateBase<TYPE, PT, 5>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 6) {
+        return InListPredicateBase<TYPE, PT, 6>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == 7) {
+        return InListPredicateBase<TYPE, PT, 7>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else if (set_size == FIXED_CONTAINER_MAX_SIZE) {
+        return InListPredicateBase<TYPE, PT, 8>::create_shared(cid, col_name, 
set, is_opposite,
+                                                               char_length);
+    } else {
+        return InListPredicateBase<TYPE, PT, FIXED_CONTAINER_MAX_SIZE + 
1>::create_shared(
+                cid, col_name, set, is_opposite, char_length);
+    }
+}
+
+template <>
+std::shared_ptr<ColumnPredicate> 
create_in_list_predicate<PredicateType::NOT_IN_LIST>(
+        const uint32_t cid, const std::string col_name, const DataTypePtr& 
data_type,
+        const std::shared_ptr<HybridSetBase> set, bool is_opposite) {
+    switch (data_type->get_primitive_type()) {
+    case TYPE_TINYINT: {
+        return create_in_list_predicate_impl<TYPE_TINYINT, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_SMALLINT: {
+        return create_in_list_predicate_impl<TYPE_SMALLINT, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_INT: {
+        return create_in_list_predicate_impl<TYPE_INT, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_BIGINT: {
+        return create_in_list_predicate_impl<TYPE_BIGINT, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_LARGEINT: {
+        return create_in_list_predicate_impl<TYPE_LARGEINT, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_FLOAT: {
+        return create_in_list_predicate_impl<TYPE_FLOAT, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DOUBLE: {
+        return create_in_list_predicate_impl<TYPE_DOUBLE, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMALV2: {
+        return create_in_list_predicate_impl<TYPE_DECIMALV2, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL32: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL32, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL64: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL64, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL128I: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL128I, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DECIMAL256: {
+        return create_in_list_predicate_impl<TYPE_DECIMAL256, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_CHAR: {
+        return create_in_list_predicate_impl<TYPE_CHAR, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite,
+                assert_cast<const 
DataTypeString*>(remove_nullable(data_type).get())->len());
+    }
+    case TYPE_VARCHAR: {
+        return create_in_list_predicate_impl<TYPE_VARCHAR, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_STRING: {
+        return create_in_list_predicate_impl<TYPE_STRING, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DATE: {
+        return create_in_list_predicate_impl<TYPE_DATE, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DATEV2: {
+        return create_in_list_predicate_impl<TYPE_DATEV2, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DATETIME: {
+        return create_in_list_predicate_impl<TYPE_DATETIME, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_DATETIMEV2: {
+        return create_in_list_predicate_impl<TYPE_DATETIMEV2, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_TIMESTAMPTZ: {
+        return create_in_list_predicate_impl<TYPE_TIMESTAMPTZ, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_BOOLEAN: {
+        return create_in_list_predicate_impl<TYPE_BOOLEAN, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_IPV4: {
+        return create_in_list_predicate_impl<TYPE_IPV4, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    case TYPE_IPV6: {
+        return create_in_list_predicate_impl<TYPE_IPV6, 
PredicateType::NOT_IN_LIST>(
+                cid, col_name, set, is_opposite);
+    }
+    default:
+        throw Exception(Status::InternalError("Unsupported type {} for 
in_predicate",
+                                              
type_to_string(data_type->get_primitive_type())));
+        return nullptr;
+    }
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to