This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 0306f2bbfb2 branch-3.1: [enhance](orc) Optimize ORC Predicate Pushdown 
for OR-connected Predicate #43255 #44615 #45104 #47506 #47625 #49088 #49835 
#49927 (#52192)
0306f2bbfb2 is described below

commit 0306f2bbfb2f3a1f30837e8654823675500120e2
Author: Socrates <[email protected]>
AuthorDate: Fri Jun 27 11:22:35 2025 +0800

    branch-3.1: [enhance](orc) Optimize ORC Predicate Pushdown for OR-connected 
Predicate #43255 #44615 #45104 #47506 #47625 #49088 #49835 #49927 (#52192)
    
    picks:
    #43255
    #44615
    #45104
    #47506
    #47625
    #49088
    #49835
    #49927
    
    ---------
    
    Co-authored-by: Qi Chen <[email protected]>
---
 be/src/runtime/exec_env.h                          |   1 +
 be/src/vec/exec/format/orc/vorc_reader.cpp         | 539 ++++++++++++++-------
 be/src/vec/exec/format/orc/vorc_reader.h           |  45 +-
 be/test/exec/test_data/orc_scanner/orders.orc      | Bin 0 -> 1293 bytes
 be/test/testutil/desc_tbl_builder.cpp              |  29 +-
 be/test/testutil/desc_tbl_builder.h                |  17 +-
 .../exec/orc/orc_convert_to_orc_literal_test.cpp   | 345 ++++++++++++-
 be/test/vec/exec/orc_reader_test.cpp               | 161 ++++++
 .../orc_predicate/orc_predicate_table.hql          |  22 +
 .../java/org/apache/doris/qe/SessionVariable.java  |  19 +
 gensrc/thrift/PaloInternalService.thrift           |   1 +
 .../data/external_table_p0/hive/test_hive_orc.out  | Bin 90753 -> 92747 bytes
 .../hive/test_hive_orc_predicate.out               | Bin 2463 -> 2553 bytes
 .../external_table_p0/hive/test_hive_orc.groovy    |  34 ++
 .../hive/test_hive_orc_predicate.groovy            |  12 +
 15 files changed, 1011 insertions(+), 214 deletions(-)

diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index c5f9f77a783..f3255be9291 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -322,6 +322,7 @@ public:
     static void set_tracking_memory(bool tracking_memory) {
         _s_tracking_memory.store(tracking_memory, std::memory_order_release);
     }
+    void set_orc_memory_pool(orc::MemoryPool* pool) { _orc_memory_pool = pool; 
}
     void set_non_block_close_thread_pool(std::unique_ptr<ThreadPool>&& pool) {
         _non_block_close_thread_pool = std::move(pool);
     }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 911620a6100..b53ce3a0a50 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -18,13 +18,14 @@
 #include "vorc_reader.h"
 
 #include <cctz/civil_time_detail.h>
-#include <ctype.h>
 #include <gen_cpp/Metrics_types.h>
+#include <gen_cpp/Opcodes_types.h>
 #include <gen_cpp/PlanNodes_types.h>
 #include <gen_cpp/Types_types.h>
 #include <glog/logging.h>
 
 #include <algorithm>
+#include <cctype>
 // IWYU pragma: no_include <bits/chrono.h>
 #include <chrono> // IWYU pragma: keep
 #include <exception>
@@ -33,12 +34,11 @@
 #include <memory>
 #include <ostream>
 #include <tuple>
-#include <variant>
+#include <utility>
 
 #include "cctz/civil_time.h"
 #include "cctz/time_zone.h"
 #include "common/exception.h"
-#include "exec/olap_utils.h"
 #include "exprs/create_predicate_function.h"
 #include "exprs/hybrid_set.h"
 #include "gutil/strings/substitute.h"
@@ -55,6 +55,7 @@
 #include "runtime/descriptors.h"
 #include "runtime/primitive_type.h"
 #include "runtime/thread_context.h"
+#include "util/runtime_profile.h"
 #include "util/slice.h"
 #include "util/timezone_utils.h"
 #include "vec/columns/column.h"
@@ -72,15 +73,13 @@
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_struct.h"
 #include "vec/exec/format/orc/orc_file_reader.h"
-#include "vec/exec/format/orc/orc_memory_pool.h"
 #include "vec/exec/format/table/transactional_hive_common.h"
 #include "vec/exprs/vbloom_predicate.h"
 #include "vec/exprs/vdirect_in_predicate.h"
 #include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr_fwd.h"
 #include "vec/exprs/vin_predicate.h"
-#include "vec/exprs/vliteral.h"
 #include "vec/exprs/vruntimefilter_wrapper.h"
-#include "vec/exprs/vslot_ref.h"
 #include "vec/runtime/vdatetime_value.h"
 
 namespace doris {
@@ -265,6 +264,10 @@ void OrcReader::_init_profile() {
                 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeNullMapTime", 
orc_profile, 1);
         _orc_profile.filter_block_time =
                 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "FilterBlockTime", 
orc_profile, 1);
+        _orc_profile.selected_row_group_count =
+                ADD_COUNTER_WITH_LEVEL(_profile, "SelectedRowGroupCount", 
TUnit::UNIT, 1);
+        _orc_profile.evaluated_row_group_count =
+                ADD_COUNTER_WITH_LEVEL(_profile, "EvaluatedRowGroupCount", 
TUnit::UNIT, 1);
     }
 }
 
@@ -288,6 +291,7 @@ Status OrcReader::_create_file_reader() {
     try {
         orc::ReaderOptions options;
         options.setMemoryPool(*ExecEnv::GetInstance()->orc_memory_pool());
+        options.setReaderMetrics(&_reader_metrics);
         _reader = orc::createReader(
                 
std::unique_ptr<ORCFileInputStream>(_file_input_stream.release()), options);
     } catch (std::exception& e) {
@@ -376,7 +380,7 @@ Status OrcReader::_init_read_columns() {
     std::vector<std::string> orc_cols;
     std::vector<std::string> orc_cols_lower_case;
     bool is_hive1_orc = false;
-    _init_orc_cols(root_type, orc_cols, orc_cols_lower_case, _type_map, 
&is_hive1_orc);
+    _init_orc_cols(root_type, orc_cols, orc_cols_lower_case, _type_map, 
&is_hive1_orc, false);
 
     // In old version slot_name_to_schema_pos may not be set in _scan_params
     // TODO, should be removed in 2.2 or later
@@ -432,7 +436,7 @@ Status OrcReader::_init_read_columns() {
 void OrcReader::_init_orc_cols(const orc::Type& type, 
std::vector<std::string>& orc_cols,
                                std::vector<std::string>& orc_cols_lower_case,
                                std::unordered_map<std::string, const 
orc::Type*>& type_map,
-                               bool* is_hive1_orc) {
+                               bool* is_hive1_orc, bool 
should_add_acid_prefix) const {
     bool hive1_orc = true;
     for (int i = 0; i < type.getSubtypeCount(); ++i) {
         orc_cols.emplace_back(type.getFieldName(i));
@@ -442,11 +446,17 @@ void OrcReader::_init_orc_cols(const orc::Type& type, 
std::vector<std::string>&
         }
         orc_cols_lower_case.emplace_back(std::move(filed_name_lower_case));
         auto file_name = type.getFieldName(i);
+        if (should_add_acid_prefix) {
+            file_name = fmt::format(
+                    "{}.{}", 
TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
+                    file_name);
+        }
         type_map.emplace(std::move(file_name), type.getSubtype(i));
         if (_is_acid) {
             const orc::Type* sub_type = type.getSubtype(i);
             if (sub_type->getKind() == orc::TypeKind::STRUCT) {
-                _init_orc_cols(*sub_type, orc_cols, orc_cols_lower_case, 
type_map, is_hive1_orc);
+                _init_orc_cols(*sub_type, orc_cols, orc_cols_lower_case, 
type_map, is_hive1_orc,
+                               true);
             }
         }
     }
@@ -500,8 +510,10 @@ static std::unordered_map<orc::TypeKind, 
orc::PredicateDataType> TYPEKIND_TO_PRE
         {orc::TypeKind::BOOLEAN, orc::PredicateDataType::BOOLEAN}};
 
 template <PrimitiveType primitive_type>
-std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type, 
const void* value,
-                                                      int precision, int 
scale) {
+std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type,
+                                                      StringRef& literal_data, 
int precision,
+                                                      int scale) {
+    const auto* value = literal_data.data;
     try {
         switch (type->getKind()) {
         case orc::TypeKind::BOOLEAN: {
@@ -549,8 +561,7 @@ std::tuple<bool, orc::Literal> convert_to_orc_literal(const 
orc::Type* type, con
         case orc::TypeKind::VARCHAR: {
             if (primitive_type == TYPE_STRING || primitive_type == TYPE_CHAR ||
                 primitive_type == TYPE_VARCHAR) {
-                StringRef* string_value = (StringRef*)value;
-                return std::make_tuple(true, orc::Literal(string_value->data, 
string_value->size));
+                return std::make_tuple(true, orc::Literal(literal_data.data, 
literal_data.size));
             }
             return std::make_tuple(false, orc::Literal(false));
         }
@@ -629,192 +640,358 @@ std::tuple<bool, orc::Literal> 
convert_to_orc_literal(const orc::Type* type, con
     }
 }
 
-template <PrimitiveType primitive_type>
-std::vector<OrcPredicate> value_range_to_predicate(
-        const ColumnValueRange<primitive_type>& col_val_range, const 
orc::Type* type) {
-    std::vector<OrcPredicate> predicates;
+std::tuple<bool, orc::Literal, orc::PredicateDataType> 
OrcReader::_make_orc_literal(
+        const VSlotRef* slot_ref, const VLiteral* literal) {
+    DCHECK(_col_name_to_file_col_name.contains(slot_ref->expr_name()));
+    auto file_col_name = _col_name_to_file_col_name[slot_ref->expr_name()];
+    if (!_type_map.contains(file_col_name)) {
+        LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in 
_type_map";
+        return std::make_tuple(false, orc::Literal(false), 
orc::PredicateDataType::LONG);
+    }
+    DCHECK(_type_map.contains(file_col_name));
+    const auto* orc_type = _type_map[file_col_name];
+    if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) {
+        LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << 
orc_type->getKind() << "]";
+        return std::make_tuple(false, orc::Literal(false), 
orc::PredicateDataType::LONG);
+    }
+    const auto predicate_type = 
TYPEKIND_TO_PREDICATE_TYPE[orc_type->getKind()];
+    if (literal == nullptr) {
+        // only get the predicate_type
+        return std::make_tuple(true, orc::Literal(true), predicate_type);
+    }
+    // this only happens when the literals of in_predicate contains null 
value, like in (1, null)
+    if (literal->get_column_ptr()->is_null_at(0)) {
+        return std::make_tuple(false, orc::Literal(false), predicate_type);
+    }
+    auto literal_data = literal->get_column_ptr()->get_data_at(0);
+    auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()];
+    auto slot_type = slot->type();
+    switch (slot_type.type) {
+#define M(NAME)                                                                
\
+    case TYPE_##NAME: {                                                        
\
+        auto [valid, orc_literal] = convert_to_orc_literal<TYPE_##NAME>(       
\
+                orc_type, literal_data, slot_type.precision, slot_type.scale); 
\
+        return std::make_tuple(valid, orc_literal, predicate_type);            
\
+    }
+#define APPLY_FOR_PRIMITIVE_TYPE(M) \
+    M(TINYINT)                      \
+    M(SMALLINT)                     \
+    M(INT)                          \
+    M(BIGINT)                       \
+    M(LARGEINT)                     \
+    M(DATE)                         \
+    M(DATETIME)                     \
+    M(DATEV2)                       \
+    M(DATETIMEV2)                   \
+    M(VARCHAR)                      \
+    M(STRING)                       \
+    M(HLL)                          \
+    M(DECIMAL32)                    \
+    M(DECIMAL64)                    \
+    M(DECIMAL128I)                  \
+    M(DECIMAL256)                   \
+    M(DECIMALV2)                    \
+    M(BOOLEAN)                      \
+    M(IPV4)                         \
+    M(IPV6)
+        APPLY_FOR_PRIMITIVE_TYPE(M)
+#undef M
+    default: {
+        VLOG_CRITICAL << "Unsupported Convert Orc Literal [ColName=" << 
slot->col_name() << "]";
+        return std::make_tuple(false, orc::Literal(false), predicate_type);
+    }
+    }
+}
 
-    PrimitiveType src_type = OrcReader::convert_to_doris_type(type).type;
-    if (src_type != primitive_type) {
-        if (!(is_string_type(src_type) && is_string_type(primitive_type))) {
-            // not support schema change
-            return predicates;
-        }
+// check if the slot of expr can be pushed down to orc reader and make orc 
predicate type
+bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) {
+    if (!expr->children()[0]->is_slot_ref()) {
+        return false;
+    }
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    // check if the slot exists in orc file and not partition column
+    if (!_col_name_to_file_col_name.contains(slot_ref->expr_name()) ||
+        
_lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name())) {
+        return false;
     }
+    auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr);
+    if (valid) {
+        _vslot_ref_to_orc_predicate_data_type[slot_ref] = predicate_type;
+    }
+    return valid;
+}
 
-    orc::PredicateDataType predicate_data_type;
-    auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind());
-    if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) {
-        // Unsupported type
-        return predicates;
-    } else {
-        predicate_data_type = type_it->second;
+// check if the literal of expr can be pushed down to orc reader and make orc 
literal
+bool OrcReader::_check_literal_can_push_down(const VExprSPtr& expr, uint16_t 
child_id) {
+    if (!expr->children()[child_id]->is_literal()) {
+        return false;
+    }
+    // the slot has been checked in _check_slot_can_push_down before calling 
this function
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[child_id].get());
+    auto [valid, orc_literal, _] = _make_orc_literal(slot_ref, literal);
+    if (valid) {
+        _vliteral_to_orc_literal.insert(std::make_pair(literal, orc_literal));
     }
+    return valid;
+}
 
-    if (col_val_range.is_fixed_value_range()) {
-        OrcPredicate in_predicate;
-        in_predicate.col_name = col_val_range.column_name();
-        in_predicate.data_type = predicate_data_type;
-        in_predicate.op = SQLFilterOp::FILTER_IN;
-        for (const auto& value : col_val_range.get_fixed_value_set()) {
-            auto [valid, literal] = convert_to_orc_literal<primitive_type>(
-                    type, &value, col_val_range.precision(), 
col_val_range.scale());
-            if (valid) {
-                in_predicate.literals.push_back(literal);
-            }
-        }
-        if (!in_predicate.literals.empty()) {
-            predicates.emplace_back(in_predicate);
-        }
-        return predicates;
+// check if there are rest children of expr can be pushed down to orc reader
+bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) {
+    if (expr->children().size() < 2) {
+        return false;
     }
 
-    const auto& high_value = col_val_range.get_range_max_value();
-    const auto& low_value = col_val_range.get_range_min_value();
-    const auto& high_op = col_val_range.get_range_high_op();
-    const auto& low_op = col_val_range.get_range_low_op();
+    bool at_least_one_child_can_push_down = false;
+    for (size_t i = 1; i < expr->children().size(); ++i) {
+        if (_check_literal_can_push_down(expr, i)) {
+            at_least_one_child_can_push_down = true;
+        }
+    }
+    return at_least_one_child_can_push_down;
+}
 
-    // orc can only push down is_null. When col_value_range._contain_null = 
true, only indicating that
-    // value can be null, not equals null, so ignore _contain_null in 
col_value_range
-    if (col_val_range.is_high_value_maximum() && high_op == 
SQLFilterOp::FILTER_LESS_OR_EQUAL &&
-        col_val_range.is_low_value_mininum() && low_op == 
SQLFilterOp::FILTER_LARGER_OR_EQUAL) {
-        return predicates;
+// check if the expr can be pushed down to orc reader
+bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) {
+    if (expr == nullptr) {
+        return false;
     }
 
-    if (low_value < high_value) {
-        if (!col_val_range.is_low_value_mininum() ||
-            SQLFilterOp::FILTER_LARGER_OR_EQUAL != low_op) {
-            auto [valid, low_literal] = convert_to_orc_literal<primitive_type>(
-                    type, &low_value, col_val_range.precision(), 
col_val_range.scale());
-            if (valid) {
-                OrcPredicate low_predicate;
-                low_predicate.col_name = col_val_range.column_name();
-                low_predicate.data_type = predicate_data_type;
-                low_predicate.op = low_op;
-                low_predicate.literals.emplace_back(low_literal);
-                predicates.emplace_back(low_predicate);
-            }
-        }
-        if (!col_val_range.is_high_value_maximum() ||
-            SQLFilterOp::FILTER_LESS_OR_EQUAL != high_op) {
-            auto [valid, high_literal] = 
convert_to_orc_literal<primitive_type>(
-                    type, &high_value, col_val_range.precision(), 
col_val_range.scale());
-            if (valid) {
-                OrcPredicate high_predicate;
-                high_predicate.col_name = col_val_range.column_name();
-                high_predicate.data_type = predicate_data_type;
-                high_predicate.op = high_op;
-                high_predicate.literals.emplace_back(high_literal);
-                predicates.emplace_back(high_predicate);
-            }
+    switch (expr->op()) {
+    case TExprOpcode::COMPOUND_AND:
+        // at least one child can be pushed down
+        return std::ranges::any_of(expr->children(), [this](const auto& child) 
{
+            return _check_expr_can_push_down(child);
+        });
+    case TExprOpcode::COMPOUND_OR:
+        // all children must be pushed down
+        return std::ranges::all_of(expr->children(), [this](const auto& child) 
{
+            return _check_expr_can_push_down(child);
+        });
+    case TExprOpcode::COMPOUND_NOT:
+        DCHECK_EQ(expr->children().size(), 1);
+        return _check_expr_can_push_down(expr->children()[0]);
+
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+    case TExprOpcode::EQ:
+    case TExprOpcode::NE:
+    case TExprOpcode::FILTER_IN:
+    case TExprOpcode::FILTER_NOT_IN:
+        // can't push down if expr is null aware predicate
+        return expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED &&
+               expr->node_type() != TExprNodeType::NULL_AWARE_IN_PRED &&
+               _check_slot_can_push_down(expr) && 
_check_rest_children_can_push_down(expr);
+
+    case TExprOpcode::INVALID_OPCODE:
+        if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
+            auto fn_name = expr->fn().name.function_name;
+            // only support is_null_pred and is_not_null_pred
+            if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") {
+                return _check_slot_can_push_down(expr);
+            }
+            VLOG_CRITICAL << "Unsupported function [funciton=" << fn_name << 
"]";
         }
+        return false;
+    default:
+        VLOG_CRITICAL << "Unsupported Opcode [OpCode=" << expr->op() << "]";
+        return false;
+    }
+}
+
+void OrcReader::_build_less_than(const VExprSPtr& expr,
+                                 std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() == 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    DCHECK(expr->children()[1]->is_literal());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    DCHECK(_vliteral_to_orc_literal.contains(literal));
+    auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
+    builder->lessThan(slot_ref->expr_name(), predicate_type, orc_literal);
+}
+
+void OrcReader::_build_less_than_equals(const VExprSPtr& expr,
+                                        
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
+    DCHECK(expr->children().size() == 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    DCHECK(expr->children()[1]->is_literal());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    DCHECK(_vliteral_to_orc_literal.contains(literal));
+    auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
+    builder->lessThanEquals(slot_ref->expr_name(), predicate_type, 
orc_literal);
+}
+
+void OrcReader::_build_equals(const VExprSPtr& expr,
+                              std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() == 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    DCHECK(expr->children()[1]->is_literal());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    const auto* literal = static_cast<const 
VLiteral*>(expr->children()[1].get());
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    DCHECK(_vliteral_to_orc_literal.contains(literal));
+    auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
+    builder->equals(slot_ref->expr_name(), predicate_type, orc_literal);
+}
+
+void OrcReader::_build_filter_in(const VExprSPtr& expr,
+                                 std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() >= 2);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    std::vector<orc::Literal> literals;
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    orc::PredicateDataType predicate_type = 
_vslot_ref_to_orc_predicate_data_type[slot_ref];
+    for (size_t i = 1; i < expr->children().size(); ++i) {
+        DCHECK(expr->children()[i]->is_literal());
+        const auto* literal = static_cast<const 
VLiteral*>(expr->children()[i].get());
+        if (_vliteral_to_orc_literal.contains(literal)) {
+            auto orc_literal = _vliteral_to_orc_literal.find(literal)->second;
+            literals.emplace_back(orc_literal);
+        }
+    }
+    DCHECK(!literals.empty());
+    if (literals.size() == 1) {
+        builder->equals(slot_ref->expr_name(), predicate_type, literals[0]);
+    } else {
+        builder->in(slot_ref->expr_name(), predicate_type, literals);
     }
-    return predicates;
 }
 
-bool static build_search_argument(std::vector<OrcPredicate>& predicates, int 
index,
-                                  std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
-    if (index >= predicates.size()) {
+void OrcReader::_build_is_null(const VExprSPtr& expr,
+                               std::unique_ptr<orc::SearchArgumentBuilder>& 
builder) {
+    DCHECK(expr->children().size() == 1);
+    DCHECK(expr->children()[0]->is_slot_ref());
+    const auto* slot_ref = static_cast<const 
VSlotRef*>(expr->children()[0].get());
+    DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref));
+    auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref];
+    builder->isNull(slot_ref->expr_name(), predicate_type);
+}
+
+bool OrcReader::_build_search_argument(const VExprSPtr& expr,
+                                       
std::unique_ptr<orc::SearchArgumentBuilder>& builder) {
+    // OPTIMIZE: check expr only once
+    if (!_check_expr_can_push_down(expr)) {
         return false;
     }
-    if (index < predicates.size() - 1) {
+    switch (expr->op()) {
+    case TExprOpcode::COMPOUND_AND: {
         builder->startAnd();
+        bool at_least_one_can_push_down = false;
+        for (const auto& child : expr->children()) {
+            if (_build_search_argument(child, builder)) {
+                at_least_one_can_push_down = true;
+            }
+        }
+        DCHECK(at_least_one_can_push_down);
+        builder->end();
+        break;
     }
-    OrcPredicate& predicate = predicates[index];
-    switch (predicate.op) {
-    case SQLFilterOp::FILTER_IN: {
-        if (predicate.literals.size() == 1) {
-            builder->equals(predicate.col_name, predicate.data_type, 
predicate.literals[0]);
-        } else {
-            builder->in(predicate.col_name, predicate.data_type, 
predicate.literals);
+    case TExprOpcode::COMPOUND_OR: {
+        builder->startOr();
+        bool all_can_push_down = true;
+        for (const auto& child : expr->children()) {
+            if (!_build_search_argument(child, builder)) {
+                all_can_push_down = false;
+            }
         }
+        DCHECK(all_can_push_down);
+        builder->end();
         break;
     }
-    case SQLFilterOp::FILTER_LESS:
-        builder->lessThan(predicate.col_name, predicate.data_type, 
predicate.literals[0]);
+    case TExprOpcode::COMPOUND_NOT: {
+        DCHECK_EQ(expr->children().size(), 1);
+        builder->startNot();
+        auto res = _build_search_argument(expr->children()[0], builder);
+        DCHECK(res);
+        builder->end();
         break;
-    case SQLFilterOp::FILTER_LESS_OR_EQUAL:
-        builder->lessThanEquals(predicate.col_name, predicate.data_type, 
predicate.literals[0]);
+    }
+    case TExprOpcode::GE:
+        builder->startNot();
+        _build_less_than(expr, builder);
+        builder->end();
         break;
-    case SQLFilterOp::FILTER_LARGER: {
+    case TExprOpcode::GT:
         builder->startNot();
-        builder->lessThanEquals(predicate.col_name, predicate.data_type, 
predicate.literals[0]);
+        _build_less_than_equals(expr, builder);
         builder->end();
         break;
-    }
-    case SQLFilterOp::FILTER_LARGER_OR_EQUAL: {
+    case TExprOpcode::LE:
+        _build_less_than_equals(expr, builder);
+        break;
+    case TExprOpcode::LT:
+        _build_less_than(expr, builder);
+        break;
+    case TExprOpcode::EQ:
+        _build_equals(expr, builder);
+        break;
+    case TExprOpcode::NE:
         builder->startNot();
-        builder->lessThan(predicate.col_name, predicate.data_type, 
predicate.literals[0]);
+        _build_equals(expr, builder);
         builder->end();
         break;
-    }
-    default:
-        return false;
-    }
-    if (index < predicates.size() - 1) {
-        bool can_build = build_search_argument(predicates, index + 1, builder);
-        if (!can_build) {
-            return false;
-        }
+    case TExprOpcode::FILTER_IN:
+        _build_filter_in(expr, builder);
+        break;
+    case TExprOpcode::FILTER_NOT_IN:
+        builder->startNot();
+        _build_filter_in(expr, builder);
         builder->end();
+        break;
+    // is null and is not null is represented as function call
+    case TExprOpcode::INVALID_OPCODE:
+        DCHECK(expr->node_type() == TExprNodeType::FUNCTION_CALL);
+        if (expr->fn().name.function_name == "is_null_pred") {
+            _build_is_null(expr, builder);
+        } else if (expr->fn().name.function_name == "is_not_null_pred") {
+            builder->startNot();
+            _build_is_null(expr, builder);
+            builder->end();
+        } else {
+            // should not reach here, because _check_expr_can_push_down has 
already checked
+            __builtin_unreachable();
+        }
+        break;
+
+    default:
+        // should not reach here, because _check_expr_can_push_down has 
already checked
+        __builtin_unreachable();
     }
     return true;
 }
 
-bool OrcReader::_init_search_argument(
-        const std::unordered_map<std::string, ColumnValueRangeType>* 
colname_to_value_range) {
-    if ((!_enable_filter_by_min_max) || colname_to_value_range->empty()) {
-        return false;
-    }
-    std::vector<OrcPredicate> predicates;
-    auto& root_type = _reader->getType();
-    std::unordered_map<std::string, const orc::Type*> type_map;
-    for (int i = 0; i < root_type.getSubtypeCount(); ++i) {
-        type_map.emplace(get_field_name_lower_case(&root_type, i), 
root_type.getSubtype(i));
-    }
-    for (auto& col_name : _lazy_read_ctx.all_read_columns) {
-        auto iter = colname_to_value_range->find(col_name);
-        if (iter == colname_to_value_range->end()) {
-            continue;
-        }
-        auto type_it = type_map.find(_col_name_to_file_col_name[col_name]);
-        if (type_it == type_map.end()) {
-            continue;
+bool OrcReader::_init_search_argument(const VExprContextSPtrs& conjuncts) {
+    // build search argument, if any expr can not be pushed down, return false
+    auto builder = orc::SearchArgumentFactory::newBuilder();
+    bool at_least_one_can_push_down = false;
+    builder->startAnd();
+    for (const auto& expr_ctx : conjuncts) {
+        _vslot_ref_to_orc_predicate_data_type.clear();
+        _vliteral_to_orc_literal.clear();
+        if (_build_search_argument(expr_ctx->root(), builder)) {
+            at_least_one_can_push_down = true;
         }
-        std::visit(
-                [&](auto& range) {
-                    std::vector<OrcPredicate> value_predicates =
-                            value_range_to_predicate(range, type_it->second);
-                    for (auto& range_predicate : value_predicates) {
-                        predicates.emplace_back(range_predicate);
-                    }
-                },
-                iter->second);
     }
-    if (predicates.empty()) {
+    if (!at_least_one_can_push_down) {
+        // if all exprs can not be pushed down, builder->end() will throw 
exception
         return false;
     }
+    builder->end();
 
-    if (_is_hive1_orc_or_use_idx) {
-        // use hive 1.x version orc file, need to convert column name to 
internal column name
-        for (OrcPredicate& it : predicates) {
-            it.col_name = _col_name_to_file_col_name[it.col_name];
-        }
-    }
-
-    // check if all column names in predicates are same as orc file
-    DCHECK(std::all_of(predicates.begin(), predicates.end(), [&](const 
OrcPredicate& predicate) {
-        return type_map.contains(predicate.col_name);
-    }));
-
-    std::unique_ptr<orc::SearchArgumentBuilder> builder = 
orc::SearchArgumentFactory::newBuilder();
-    if (build_search_argument(predicates, 0, builder)) {
-        std::unique_ptr<orc::SearchArgument> sargs = builder->build();
-        _row_reader_options.searchArgument(std::move(sargs));
-        return true;
-    } else {
-        return false;
-    }
+    auto sargs = builder->build();
+    _profile->add_info_string("OrcReader SearchArgument: ", sargs->toString());
+    _row_reader_options.searchArgument(std::move(sargs));
+    return true;
 }
 
 Status OrcReader::set_fill_columns(
@@ -846,7 +1023,7 @@ Status OrcReader::set_fill_columns(
                     visit_slot(child.get());
                 }
             } else if (VInPredicate* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
-                if (in_predicate->children().size() > 0) {
+                if (!in_predicate->children().empty()) {
                     visit_slot(in_predicate->children()[0].get());
                 }
             } else {
@@ -865,12 +1042,25 @@ Status OrcReader::set_fill_columns(
         visit_slot(conjunct->root().get());
     }
 
+    if (_is_acid) {
+        _lazy_read_ctx.predicate_orc_columns.insert(
+                _lazy_read_ctx.predicate_orc_columns.end(),
+                TransactionalHive::READ_ROW_COLUMN_NAMES.begin(),
+                TransactionalHive::READ_ROW_COLUMN_NAMES.end());
+    }
+
     for (auto& read_col : _read_cols_lower_case) {
         _lazy_read_ctx.all_read_columns.emplace_back(read_col);
         if (predicate_columns.size() > 0) {
             auto iter = predicate_columns.find(read_col);
             if (iter == predicate_columns.end()) {
-                _lazy_read_ctx.lazy_read_columns.emplace_back(read_col);
+                if (!_is_acid ||
+                    
std::find(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
+                              
TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end(),
+                              read_col) ==
+                            
TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end()) {
+                    _lazy_read_ctx.lazy_read_columns.emplace_back(read_col);
+                }
             } else {
                 
_lazy_read_ctx.predicate_columns.first.emplace_back(iter->first);
                 
_lazy_read_ctx.predicate_columns.second.emplace_back(iter->second.second);
@@ -926,8 +1116,22 @@ Status OrcReader::set_fill_columns(
         _lazy_read_ctx.can_lazy_read = true;
     }
 
-    if (_colname_to_value_range == nullptr || 
!_init_search_argument(_colname_to_value_range)) {
+    if (_lazy_read_ctx.conjuncts.empty()) {
         _lazy_read_ctx.can_lazy_read = false;
+    } else if (_enable_filter_by_min_max) {
+        auto res = _init_search_argument(_lazy_read_ctx.conjuncts);
+        if (_state->query_options().check_orc_init_sargs_success && !res) {
+            std::stringstream ss;
+            for (const auto& conjunct : _lazy_read_ctx.conjuncts) {
+                ss << conjunct->root()->debug_string() << "\n";
+            }
+            std::string conjuncts_str = ss.str();
+            return Status::InternalError(
+                    "Session variable check_orc_init_sargs_success is set, but 
"
+                    "_init_search_argument returns false because all exprs can 
not be pushed "
+                    "down:\n " +
+                    conjuncts_str);
+        }
     }
     try {
         _row_reader_options.range(_range_start_offset, _range_size);
@@ -1069,7 +1273,8 @@ Status OrcReader::_fill_partition_columns(
         if (num_deserialized != rows) {
             return Status::InternalError(
                     "Failed to fill partition column: {}={} ."
-                    "Number of rows expected to be written : {}, number of 
rows actually written : "
+                    "Number of rows expected to be written : {}, number of 
rows actually "
+                    "written : "
                     "{}",
                     slot_desc->col_name(), value, num_deserialized, rows);
         }
@@ -1675,6 +1880,12 @@ std::string OrcReader::get_field_name_lower_case(const 
orc::Type* orc_type, int
 
 Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
     RETURN_IF_ERROR(get_next_block_impl(block, read_rows, eof));
+    if (*eof) {
+        COUNTER_UPDATE(_orc_profile.selected_row_group_count,
+                       _reader_metrics.SelectedRowGroupCount);
+        COUNTER_UPDATE(_orc_profile.evaluated_row_group_count,
+                       _reader_metrics.EvaluatedRowGroupCount);
+    }
     if (_orc_filter) {
         RETURN_IF_ERROR(_orc_filter->get_status());
     }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 4ebfb68a22f..98c31645b41 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -18,9 +18,9 @@
 #pragma once
 
 #include <cctz/time_zone.h>
-#include <stddef.h>
-#include <stdint.h>
 
+#include <cstddef>
+#include <cstdint>
 #include <list>
 #include <memory>
 #include <orc/OrcFile.hh>
@@ -41,6 +41,7 @@
 #include "orc/Reader.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
+#include "orc/sargs/Literal.hh"
 #include "runtime/types.h"
 #include "util/runtime_profile.h"
 #include "vec/aggregate_functions/aggregate_function.h"
@@ -51,6 +52,8 @@
 #include "vec/exec/format/format_common.h"
 #include "vec/exec/format/generic_reader.h"
 #include "vec/exec/format/table/transactional_hive_reader.h"
+#include "vec/exprs/vliteral.h"
+#include "vec/exprs/vslot_ref.h"
 
 namespace doris {
 class RuntimeState;
@@ -80,13 +83,6 @@ namespace doris::vectorized {
 
 class ORCFileInputStream;
 
-struct OrcPredicate {
-    std::string col_name;
-    orc::PredicateDataType data_type;
-    std::vector<orc::Literal> literals;
-    SQLFilterOp op;
-};
-
 struct LazyReadContext {
     VExprContextSPtrs conjuncts;
     bool can_lazy_read = false;
@@ -227,6 +223,8 @@ private:
         RuntimeProfile::Counter* decode_value_time = nullptr;
         RuntimeProfile::Counter* decode_null_map_time = nullptr;
         RuntimeProfile::Counter* filter_block_time = nullptr;
+        RuntimeProfile::Counter* selected_row_group_count = nullptr;
+        RuntimeProfile::Counter* evaluated_row_group_count = nullptr;
     };
 
     class ORCFilterImpl : public orc::ORCFilter {
@@ -287,11 +285,30 @@ private:
     void _init_orc_cols(const orc::Type& type, std::vector<std::string>& 
orc_cols,
                         std::vector<std::string>& orc_cols_lower_case,
                         std::unordered_map<std::string, const orc::Type*>& 
type_map,
-                        bool* is_hive1_orc);
+                        bool* is_hive1_orc, bool should_add_acid_prefix) const;
     static bool _check_acid_schema(const orc::Type& type);
     static const orc::Type& _remove_acid(const orc::Type& type);
-    bool _init_search_argument(
-            const std::unordered_map<std::string, ColumnValueRangeType>* 
colname_to_value_range);
+
+    // functions for building search argument until _init_search_argument
+    std::tuple<bool, orc::Literal, orc::PredicateDataType> _make_orc_literal(
+            const VSlotRef* slot_ref, const VLiteral* literal);
+    bool _check_slot_can_push_down(const VExprSPtr& expr);
+    bool _check_literal_can_push_down(const VExprSPtr& expr, uint16_t 
child_id);
+    bool _check_rest_children_can_push_down(const VExprSPtr& expr);
+    bool _check_expr_can_push_down(const VExprSPtr& expr);
+    void _build_less_than(const VExprSPtr& expr,
+                          std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
+    void _build_less_than_equals(const VExprSPtr& expr,
+                                 std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
+    void _build_equals(const VExprSPtr& expr, 
std::unique_ptr<orc::SearchArgumentBuilder>& builder);
+    void _build_filter_in(const VExprSPtr& expr,
+                          std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
+    void _build_is_null(const VExprSPtr& expr,
+                        std::unique_ptr<orc::SearchArgumentBuilder>& builder);
+    bool _build_search_argument(const VExprSPtr& expr,
+                                std::unique_ptr<orc::SearchArgumentBuilder>& 
builder);
+    bool _init_search_argument(const VExprContextSPtrs& conjuncts);
+
     void _init_bloom_filter(
             std::unordered_map<std::string, ColumnValueRangeType>* 
colname_to_value_range);
     void _init_system_properties();
@@ -584,6 +601,7 @@ private:
     std::unique_ptr<ORCFileInputStream> _file_input_stream;
     Statistics _statistics;
     OrcProfile _orc_profile;
+    orc::ReaderMetrics _reader_metrics;
 
     std::unique_ptr<orc::ColumnVectorBatch> _batch;
     std::unique_ptr<orc::Reader> _reader;
@@ -629,6 +647,9 @@ private:
     std::unordered_map<std::string, std::string> _table_col_to_file_col;
     //support iceberg position delete .
     std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
+    std::unordered_map<const VSlotRef*, orc::PredicateDataType>
+            _vslot_ref_to_orc_predicate_data_type;
+    std::unordered_map<const VLiteral*, orc::Literal> _vliteral_to_orc_literal;
 
     // If you set "orc_tiny_stripe_threshold_bytes" = 0, the use tiny stripes 
merge io optimization will not be used.
     int64_t _orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L;
diff --git a/be/test/exec/test_data/orc_scanner/orders.orc 
b/be/test/exec/test_data/orc_scanner/orders.orc
new file mode 100644
index 00000000000..6fad5043288
Binary files /dev/null and b/be/test/exec/test_data/orc_scanner/orders.orc 
differ
diff --git a/be/test/testutil/desc_tbl_builder.cpp 
b/be/test/testutil/desc_tbl_builder.cpp
index 4cba9a44a4b..6404d1c5449 100644
--- a/be/test/testutil/desc_tbl_builder.cpp
+++ b/be/test/testutil/desc_tbl_builder.cpp
@@ -17,20 +17,9 @@
 
 #include "testutil/desc_tbl_builder.h"
 
-#include <glog/logging.h>
-#include <gtest/gtest-message.h>
-#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
 
-#include <vector>
-
-#include "common/object_pool.h"
 #include "common/status.h"
-#include "gtest/gtest_pred_impl.h"
-#include "runtime/define_primitive_type.h"
-#include "runtime/descriptors.h"
-#include "util/bit_util.h"
-
-using std::vector;
 
 namespace doris {
 
@@ -44,7 +33,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() {
 
 // item_id of -1 indicates no itemTupleId
 static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const 
TypeDescriptor& type,
-                                            int slot_idx, int item_id) {
+                                            const std::string& name, int 
slot_idx, int item_id) {
     int null_byte = slot_idx / 8;
     int null_bit = slot_idx % 8;
     TSlotDescriptor slot_desc;
@@ -58,6 +47,7 @@ static TSlotDescriptor make_slot_descriptor(int id, int 
parent_id, const TypeDes
     slot_desc.__set_nullIndicatorBit(null_bit);
     slot_desc.__set_slotIdx(slot_idx);
     slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
     // if (item_id != -1) {
     //     slot_desc.__set_itemTupleId(item_id);
     // }
@@ -78,8 +68,9 @@ DescriptorTbl* DescriptorTblBuilder::build() {
     int tuple_id = 0;
     int slot_id = 0;
 
-    for (int i = 0; i < _tuples_descs.size(); ++i) {
-        build_tuple(_tuples_descs[i]->slot_types(), &thrift_desc_tbl, 
&tuple_id, &slot_id);
+    for (auto& _tuples_desc : _tuples_descs) {
+        build_tuple(_tuples_desc->slot_types(), _tuples_desc->slot_names(), 
&thrift_desc_tbl,
+                    &tuple_id, &slot_id);
     }
 
     Status status = DescriptorTbl::create(_obj_pool, thrift_desc_tbl, 
&desc_tbl);
@@ -87,7 +78,8 @@ DescriptorTbl* DescriptorTblBuilder::build() {
     return desc_tbl;
 }
 
-TTupleDescriptor DescriptorTblBuilder::build_tuple(const 
vector<TypeDescriptor>& slot_types,
+TTupleDescriptor DescriptorTblBuilder::build_tuple(const 
std::vector<TypeDescriptor>& slot_types,
+                                                   const 
std::vector<std::string>& slot_names,
                                                    TDescriptorTable* 
thrift_desc_tbl,
                                                    int* next_tuple_id, int* 
slot_id) {
     // We never materialize struct slots (there's no in-memory representation 
of structs,
@@ -95,7 +87,8 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const 
vector<TypeDescriptor>&
     // still have a struct item type. In this case, the array item tuple 
contains the
     // "inlined" struct fields.
     if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) {
-        return build_tuple(slot_types[0].children, thrift_desc_tbl, 
next_tuple_id, slot_id);
+        return build_tuple(slot_types[0].children, slot_types[0].field_names, 
thrift_desc_tbl,
+                           next_tuple_id, slot_id);
     }
 
     int tuple_id = *next_tuple_id;
@@ -111,7 +104,7 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const 
vector<TypeDescriptor>&
         // }
 
         thrift_desc_tbl->slotDescriptors.push_back(
-                make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, 
item_id));
+                make_slot_descriptor(*slot_id, tuple_id, slot_types[i], 
slot_names[i], i, item_id));
         thrift_desc_tbl->__isset.slotDescriptors = true;
         ++(*slot_id);
     }
diff --git a/be/test/testutil/desc_tbl_builder.h 
b/be/test/testutil/desc_tbl_builder.h
index c29ef9acd43..968b29bd001 100644
--- a/be/test/testutil/desc_tbl_builder.h
+++ b/be/test/testutil/desc_tbl_builder.h
@@ -20,15 +20,16 @@
 
 #include <gen_cpp/Descriptors_types.h>
 
+#include <tuple>
 #include <vector>
 
+#include "common/object_pool.h"
+#include "runtime/descriptors.h"
 #include "runtime/types.h"
 
 namespace doris {
 
-class ObjectPool;
 class TupleDescBuilder;
-class DescriptorTbl;
 
 // Aids in the construction of a DescriptorTbl by declaring tuples and slots
 // associated with those tuples.
@@ -40,6 +41,7 @@ class DescriptorTbl;
 // DescriptorTblBuilder builder;
 // builder.declare_tuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0
 // builder.declare_tuple() << TYPE_FLOAT; // gets TupleId 1
+// builder.declare_tuple() << std::make_tuple(TYPE_INT, "col1") << 
std::make_tuple(TYPE_STRING, "col2"); // gets Tuple with type and name
 // DescriptorTbl desc_tbl = builder.build();
 class DescriptorTblBuilder {
 public:
@@ -57,20 +59,31 @@ private:
     std::vector<TupleDescBuilder*> _tuples_descs;
 
     TTupleDescriptor build_tuple(const std::vector<TypeDescriptor>& slot_types,
+                                 const std::vector<std::string>& slot_names,
                                  TDescriptorTable* thrift_desc_tbl, int* 
tuple_id, int* slot_id);
 };
 
 class TupleDescBuilder {
 public:
+    using SlotType = std::tuple<TypeDescriptor, std::string>;
+    TupleDescBuilder& operator<<(const SlotType& slot) {
+        _slot_types.push_back(std::get<0>(slot));
+        _slot_names.push_back(std::get<1>(slot));
+        return *this;
+    }
+
     TupleDescBuilder& operator<<(const TypeDescriptor& slot_type) {
         _slot_types.push_back(slot_type);
+        _slot_names.emplace_back("");
         return *this;
     }
 
     std::vector<TypeDescriptor> slot_types() const { return _slot_types; }
+    std::vector<std::string> slot_names() const { return _slot_names; }
 
 private:
     std::vector<TypeDescriptor> _slot_types;
+    std::vector<std::string> _slot_names;
 };
 
 } // end namespace doris
diff --git a/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp 
b/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp
index ac79f22a6bb..947fb957ee5 100644
--- a/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp
+++ b/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp
@@ -41,7 +41,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<char*>(&tiny_value), 
sizeof(tiny_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::BYTE);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_TINYINT>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_TINYINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getLong(), 127);
     }
@@ -52,7 +52,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<char*>(&small_value), 
sizeof(small_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::SHORT);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_SMALLINT>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_SMALLINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getLong(), 32000);
     }
@@ -63,7 +63,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<char*>(&int_value), 
sizeof(int_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getLong(), 2147483647);
     }
@@ -74,7 +74,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<char*>(&big_value), 
sizeof(big_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_BIGINT>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_BIGINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getLong(), 9223372036854775807LL);
     }
@@ -84,7 +84,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<char*>(&float_value), 
sizeof(float_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_FLOAT>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_FLOAT>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_NEAR(literal.getFloat(), 3.14159f, 0.0001);
     }
@@ -95,7 +95,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<char*>(&double_value), 
sizeof(double_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DOUBLE);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_DOUBLE>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DOUBLE>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_DOUBLE_EQ(literal.getFloat(), 3.14159265358979323846);
     }
@@ -105,12 +105,24 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, 
ConvertTypesTest) {
         StringRef literal_data(str_value.data(), str_value.size());
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_STRING>(orc_type_ptr.get(), 
(void*)&literal_data, 0, 0);
+                convert_to_orc_literal<TYPE_STRING>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(std::string(literal.getString().data(), 
literal.getString().length()),
                   "Hello, World!");
     }
 
+    // VARCHAR test
+    {
+        std::string str_value = "VARCHAR test";
+        StringRef literal_data(str_value.data(), str_value.size());
+        auto orc_type_ptr = createPrimitiveType(orc::TypeKind::VARCHAR);
+        auto [success, literal] =
+                convert_to_orc_literal<TYPE_VARCHAR>(orc_type_ptr.get(), 
literal_data, 0, 0);
+        ASSERT_TRUE(success);
+        ASSERT_EQ(std::string(literal.getString().data(), 
literal.getString().length()),
+                  "VARCHAR test");
+    }
+
     // DECIMAL32 test
     {
         int32_t decimal32_value = 12345;
@@ -118,7 +130,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
                                sizeof(decimal32_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_DECIMAL32>(orc_type_ptr.get(), 
literal_data.data, 9, 4);
+                convert_to_orc_literal<TYPE_DECIMAL32>(orc_type_ptr.get(), 
literal_data, 9, 4);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getDecimal().toString(), "1.2345");
     }
@@ -129,8 +141,8 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<const char*>(&decimal64_value),
                                sizeof(decimal64_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL);
-        auto [success, literal] = 
convert_to_orc_literal<TYPE_DECIMAL64>(orc_type_ptr.get(),
-                                                                         
literal_data.data, 18, 6);
+        auto [success, literal] =
+                convert_to_orc_literal<TYPE_DECIMAL64>(orc_type_ptr.get(), 
literal_data, 18, 6);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getDecimal().toString(), "123456789.012345");
     }
@@ -141,8 +153,8 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<const 
char*>(&decimal128_value),
                                sizeof(decimal128_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL);
-        auto [success, literal] = convert_to_orc_literal<TYPE_DECIMAL128I>(
-                orc_type_ptr.get(), literal_data.data, 38, 9);
+        auto [success, literal] =
+                convert_to_orc_literal<TYPE_DECIMAL128I>(orc_type_ptr.get(), 
literal_data, 38, 9);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getDecimal().toString(), "1.234512345");
     }
@@ -154,7 +166,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         StringRef literal_data(reinterpret_cast<const char*>(&date_value), 
sizeof(date_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DATE);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
 
         // Verify converted day offset
@@ -165,7 +177,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         date_value.from_date_str("0001-01-01", 10);
         literal_data = StringRef(reinterpret_cast<const char*>(&date_value), 
sizeof(date_value));
         std::tie(success, literal) =
-                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success); //-719162
         ASSERT_EQ(literal.getDate(), -719162);
 
@@ -173,7 +185,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         date_value.from_date_str("9999-12-31", 10);
         literal_data = StringRef(reinterpret_cast<const char*>(&date_value), 
sizeof(date_value));
         std::tie(success, literal) =
-                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success); //
         ASSERT_EQ(literal.getDate(), 2932896);
     }
@@ -187,7 +199,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
                                sizeof(datetime_value));
         auto orc_type_ptr = createPrimitiveType(orc::TypeKind::TIMESTAMP);
         auto [success, literal] =
-                convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
 
         // Verify seconds and nanoseconds
@@ -198,7 +210,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) {
         literal_data =
                 StringRef(reinterpret_cast<const char*>(&datetime_value), 
sizeof(datetime_value));
         std::tie(success, literal) =
-                convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getTimestamp().getMillis(), 1710374400000); //
 
@@ -207,10 +219,307 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, 
ConvertTypesTest) {
         literal_data =
                 StringRef(reinterpret_cast<const char*>(&datetime_value), 
sizeof(datetime_value));
         std::tie(success, literal) =
-                convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data.data, 0, 0);
+                convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data, 0, 0);
         ASSERT_TRUE(success);
         ASSERT_EQ(literal.getTimestamp().getMillis(), 1709208000000); //
     }
+
+    // Type mismatch test
+    {
+        // Try to convert INT type to STRING
+        int32_t int_value = 42;
+        StringRef literal_data(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
+        auto [success, literal] =
+                convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+        ASSERT_FALSE(success);
+    }
+
+    // Try to convert FLOAT to DOUBLE
+    {
+        float float_value = 3.14f;
+        StringRef literal_data(reinterpret_cast<const char*>(&float_value), 
sizeof(float_value));
+        auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DOUBLE);
+        auto [success, literal] =
+                convert_to_orc_literal<TYPE_FLOAT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+        ASSERT_FALSE(success);
+    }
+
+    // Try to convert DATE to TIMESTAMP
+    {
+        VecDateTimeValue date_value;
+        date_value.from_date_str("2024-03-14", 10);
+        StringRef literal_data(reinterpret_cast<const char*>(&date_value), 
sizeof(date_value));
+        auto orc_type_ptr = createPrimitiveType(orc::TypeKind::TIMESTAMP);
+        auto [success, literal] =
+                convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data, 0, 0);
+        ASSERT_FALSE(success);
+    }
+
+    {
+        // TINYINT -> other integer types
+        int8_t tiny_value = 42;
+        StringRef literal_data(reinterpret_cast<const char*>(&tiny_value), 
sizeof(tiny_value));
+
+        // TINYINT -> SHORT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::SHORT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_TINYINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_TRUE(success);
+        }
+
+        // TINYINT -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_TINYINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_TRUE(success);
+        }
+
+        // TINYINT -> LONG
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_TINYINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_TRUE(success);
+        }
+    }
+
+    // 2. Converting between floating point and integer types
+    {
+        // FLOAT -> integer types
+        float float_value = 3.14f;
+        StringRef literal_data(reinterpret_cast<const char*>(&float_value), 
sizeof(float_value));
+
+        // FLOAT -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_FLOAT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // FLOAT -> LONG
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_FLOAT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> FLOAT
+        int32_t int_value = 42;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+    }
+
+    // 3. Conversion between string and numeric types
+    {
+        // STRING -> numeric types
+        std::string str_value = "123";
+        StringRef literal_data(str_value.data(), str_value.size());
+
+        // STRING -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_STRING>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // STRING -> FLOAT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_STRING>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> STRING
+        int32_t int_value = 42;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+    }
+
+    // 4. Conversion between date/time types and other types
+    {
+        // DATE -> other types
+        VecDateTimeValue date_value;
+        date_value.from_date_str("2024-03-14", 10);
+        StringRef literal_data(reinterpret_cast<const char*>(&date_value), 
sizeof(date_value));
+
+        // DATE -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // DATE -> STRING
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_DATE>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> DATE
+        int32_t int_value = 42;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DATE);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+    }
+
+    // 5. Conversion between Decimal and other types
+    {
+        // DECIMAL -> other types
+        int128_t decimal_value = 123456789;
+        StringRef literal_data(reinterpret_cast<const char*>(&decimal_value),
+                               sizeof(decimal_value));
+
+        // DECIMAL -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] = 
convert_to_orc_literal<TYPE_DECIMAL128I>(orc_type_ptr.get(),
+                                                                               
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // DECIMAL -> FLOAT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT);
+            auto [success, literal] = 
convert_to_orc_literal<TYPE_DECIMAL128I>(orc_type_ptr.get(),
+                                                                               
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> DECIMAL
+        int32_t int_value = 42;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 10, 2);
+            ASSERT_FALSE(success);
+        }
+    }
+
+    // 6. Conversion between BOOLEAN and other types
+    {
+        // BOOLEAN -> other types
+        uint8_t bool_value = true;
+        StringRef literal_data(reinterpret_cast<const char*>(&bool_value), 
sizeof(bool_value));
+
+        // BOOLEAN -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_BOOLEAN>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // BOOLEAN -> STRING
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_BOOLEAN>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> BOOLEAN
+        int32_t int_value = 1;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::BOOLEAN);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+    }
+
+    // 7. Conversion between TIMESTAMP and other types
+    {
+        // TIMESTAMP -> other types
+        VecDateTimeValue datetime_value;
+        datetime_value.from_date_str("2024-03-14 15:30:45", 19);
+        StringRef literal_data(reinterpret_cast<const char*>(&datetime_value),
+                               sizeof(datetime_value));
+
+        // TIMESTAMP -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // TIMESTAMP -> STRING
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_DATETIME>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> TIMESTAMP
+        int64_t int_value = 1615737045;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::TIMESTAMP);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_BIGINT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+    }
+
+    // 8. Conversion between VARCHAR and other types
+    {
+        // VARCHAR -> other types
+        std::string varchar_value = "test string";
+        StringRef literal_data(varchar_value.data(), varchar_value.size());
+
+        // VARCHAR -> INT
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_VARCHAR>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+
+        // VARCHAR -> DECIMAL
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_VARCHAR>(orc_type_ptr.get(), 
literal_data, 10, 2);
+            ASSERT_FALSE(success);
+        }
+
+        // INT -> VARCHAR
+        int32_t int_value = 42;
+        literal_data = StringRef(reinterpret_cast<const char*>(&int_value), 
sizeof(int_value));
+        {
+            auto orc_type_ptr = createPrimitiveType(orc::TypeKind::VARCHAR);
+            auto [success, literal] =
+                    convert_to_orc_literal<TYPE_INT>(orc_type_ptr.get(), 
literal_data, 0, 0);
+            ASSERT_FALSE(success);
+        }
+    }
 }
 } // namespace vectorized
 } // namespace doris
diff --git a/be/test/vec/exec/orc_reader_test.cpp 
b/be/test/vec/exec/orc_reader_test.cpp
new file mode 100644
index 00000000000..ff7452ae625
--- /dev/null
+++ b/be/test/vec/exec/orc_reader_test.cpp
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "orc/sargs/SearchArgument.hh"
+#include "runtime/define_primitive_type.h"
+#include "runtime/exec_env.h"
+#include "runtime/runtime_state.h"
+#include "testutil/desc_tbl_builder.h"
+#include "vec/exec/format/orc/orc_memory_pool.h"
+#include "vec/exec/format/orc/vorc_reader.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/exprs/vexpr_fwd.h"
+#include "vec/utils/util.hpp"
+namespace doris::vectorized {
+class OrcReaderTest : public testing::Test {
+public:
+    OrcReaderTest() = default;
+    ~OrcReaderTest() override = default;
+
+private:
+    static constexpr const char* CANNOT_PUSH_DOWN_ERROR = "can't push down";
+    std::string build_search_argument(const std::string& expr) {
+        // build orc_reader for table orders
+        std::vector<std::string> column_names = {
+                "o_orderkey",      "o_custkey", "o_orderstatus",  
"o_totalprice", "o_orderdate",
+                "o_orderpriority", "o_clerk",   "o_shippriority", "o_comment"};
+        ObjectPool object_pool;
+        DescriptorTblBuilder builder(&object_pool);
+        builder.declare_tuple() << std::make_tuple(TYPE_INT, "o_orderkey")
+                                << std::make_tuple(TYPE_INT, "o_custkey")
+                                << std::make_tuple(TYPE_STRING, 
"o_orderstatus")
+                                << std::make_tuple(TYPE_DOUBLE, "o_totalprice")
+                                << std::make_tuple(TYPE_DATE, "o_orderdate")
+                                << std::make_tuple(TYPE_STRING, 
"o_orderpriority")
+                                << std::make_tuple(TYPE_STRING, "o_clerk")
+                                << std::make_tuple(TYPE_INT, "o_shippriority")
+                                << std::make_tuple(TYPE_STRING, "o_comment");
+        DescriptorTbl* desc_tbl = builder.build();
+        auto* tuple_desc = 
const_cast<TupleDescriptor*>(desc_tbl->get_tuple_descriptor(0));
+        RowDescriptor row_desc(tuple_desc, false);
+        TFileScanRangeParams params;
+        TFileRangeDesc range;
+        range.path = "./be/test/exec/test_data/orc_scanner/orders.orc";
+        range.start_offset = 0;
+        range.size = 1293;
+        auto reader = OrcReader::create_unique(params, range, "", nullptr, 
true);
+        auto status = reader->init_reader(&column_names, nullptr, {}, false, 
tuple_desc, &row_desc,
+                                          nullptr, nullptr);
+        EXPECT_TRUE(status.ok());
+
+        // deserialize expr
+        auto exprx = apache::thrift::from_json_string<TExpr>(expr);
+        VExprContextSPtr context;
+        status = VExpr::create_expr_tree(exprx, context);
+        EXPECT_TRUE(status.ok());
+
+        // prepare expr context
+        RuntimeState state;
+        state.set_desc_tbl(desc_tbl);
+        status = context->prepare(&state, row_desc);
+        EXPECT_TRUE(status.ok());
+
+        // build search argument
+        auto sarg_builder = orc::SearchArgumentFactory::newBuilder();
+        auto res = reader->_build_search_argument(context->root(), 
sarg_builder);
+        if (!res) {
+            return CANNOT_PUSH_DOWN_ERROR;
+        }
+        return sarg_builder->build()->toString();
+    }
+};
+
+TEST_F(OrcReaderTest, test_build_search_argument) {
+    ExecEnv::GetInstance()->set_orc_memory_pool(new ORCMemoryPool());
+    std::vector<std::string>
+            exprs =
+                    {
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 
2000000, 3000000);
+                            
R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not 
in (200, 300, 400));
+                            
R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and 
o_orderkey != 1100000);
+                            
R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":
 [...]
+                            // SELECT count(o_orderkey) FROM tpch1_orc.orders 
WHERE o_orderkey IN (1000000, 2000000, 3000000) OR (o_orderdate >= '1994-01-01' 
AND o_orderdate <= '1994-12-31');
+                            
R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"re
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 2 or (o_comment like '%delayed%' and o_orderpriority = 
'1-URGENT');
+                            
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 2 or (o_totalprice < 173665.47 and o_custkey >= 36901);
+                            
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey < 1 + 1;
+                            
R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey in (null, 25);
+                            
R"|({"1":{"lst":["rec",4,{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":3},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{
 [...]
+                            // SELECT count(o_orderkey) FROM tpch1_orc.orders 
WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT';
+                            
R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey between 1 and 100 or random() > 0.5;
+                            
R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where lower(o_orderpriority) = '1-urgent';
+                            
R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderkey * 2 < 60;
+                            
R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{
 [...]
+                            // select count(o_orderkey) from tpch1_orc.orders 
where o_orderdate is not null;
+                            
R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}
 [...]
+                    };
+    std::vector<std::string> result_search_arguments = {
+            "leaf-0 = (o_orderkey < 100), leaf-1 = (o_orderkey <= 5999900), 
leaf-2 "
+            "= (o_orderkey "
+            "in "
+            "[1000000, 2000000, 3000000]), expr = (or leaf-0 (not leaf-1) 
leaf-2)",
+            "leaf-0 = (o_orderkey is null), leaf-1 = (o_orderkey < 100), 
leaf-2 = "
+            "(o_orderkey <= "
+            "1000), leaf-3 = (o_orderkey in [200, 300, 400]), expr = (and (or "
+            "leaf-0 (not leaf-1)) "
+            "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))",
+            "leaf-0 = (o_orderkey is null), leaf-1 = (o_orderkey < 1000000), 
leaf-2 = (o_orderkey "
+            "<= 1200000), leaf-3 = (o_orderkey = 1100000), expr = (and (or 
leaf-0 (not leaf-1)) "
+            "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))",
+            "leaf-0 = (o_orderkey in [1000000, 2000000, 3000000]), leaf-1 = 
(o_orderdate < "
+            "17121205), leaf-2 = (o_orderdate <= 17121205), expr = (and (or 
leaf-0 (not leaf-1)) "
+            "(or leaf-0 leaf-2))",
+            "leaf-0 = (o_orderkey < 2), leaf-1 = (o_orderpriority = 1-URGENT), 
expr = (or leaf-0 "
+            "leaf-1)",
+            "leaf-0 = (o_orderkey < 2), leaf-1 = (o_custkey < 36901), expr = 
(or leaf-0 (not "
+            "leaf-1))",
+            "leaf-0 = (o_orderkey < 2), expr = leaf-0",
+            "leaf-0 = (o_orderkey = 25), expr = leaf-0",
+            CANNOT_PUSH_DOWN_ERROR,
+            CANNOT_PUSH_DOWN_ERROR,
+            CANNOT_PUSH_DOWN_ERROR,
+            CANNOT_PUSH_DOWN_ERROR,
+            CANNOT_PUSH_DOWN_ERROR,
+    };
+    for (int i = 0; i < exprs.size(); i++) {
+        auto search_argument = build_search_argument(exprs[i]);
+        ASSERT_EQ(search_argument, result_search_arguments[i]);
+    }
+}
+
+} // namespace doris::vectorized
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
index a946b25ff1a..6a1c9dce521 100644
--- 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql
@@ -14,3 +14,25 @@ create table type_changed_table (
 ) stored as orc;
 insert into type_changed_table values (1, 'Alice'), (2, 'Bob'), (3, 'Charlie');
 ALTER TABLE type_changed_table CHANGE COLUMN id id STRING;
+
+CREATE TABLE table_a (
+    id INT,
+    age INT
+) STORED AS ORC;
+
+INSERT INTO table_a VALUES
+(1, null),
+(2, 18),
+(3, null),
+(4, 25);
+
+CREATE TABLE table_b (
+    id INT,
+    age INT
+) STORED AS ORC;
+
+INSERT INTO table_b VALUES
+(1, null),
+(2, null),
+(3, 1000000),
+(4, 100);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index a2e158f64e6..4f4cea552be 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -489,6 +489,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = 
"enable_orc_filter_by_min_max";
 
+    public static final String CHECK_ORC_INIT_SARGS_SUCCESS = 
"check_orc_init_sargs_success";
+
     public static final String INLINE_CTE_REFERENCED_THRESHOLD = 
"inline_cte_referenced_threshold";
 
     public static final String ENABLE_CTE_MATERIALIZE = 
"enable_cte_materialize";
@@ -1872,6 +1874,14 @@ public class SessionVariable implements Serializable, 
Writable {
             needForward = true)
     public boolean enableOrcFilterByMinMax = true;
 
+    @VariableMgr.VarAttr(
+            name = CHECK_ORC_INIT_SARGS_SUCCESS,
+            description = {"是否检查orc init sargs是否成功。默认为 false。",
+                    "Whether to check whether orc init sargs is successful. "
+                            + "The default value is false."},
+            needForward = true)
+    public boolean checkOrcInitSargsSuccess = false;
+
     @VariableMgr.VarAttr(
             name = EXTERNAL_TABLE_ANALYZE_PART_NUM,
             description = {"收集外表统计信息行数时选取的采样分区数,默认 -1 表示全部分区",
@@ -3513,6 +3523,14 @@ public class SessionVariable implements Serializable, 
Writable {
         this.enableOrcFilterByMinMax = enableOrcFilterByMinMax;
     }
 
+    public boolean isCheckOrcInitSargsSuccess() {
+        return checkOrcInitSargsSuccess;
+    }
+
+    public void setCheckOrcInitSargsSuccess(boolean checkOrcInitSargsSuccess) {
+        this.checkOrcInitSargsSuccess = checkOrcInitSargsSuccess;
+    }
+
     public String getSqlDialect() {
         return sqlDialect;
     }
@@ -4146,6 +4164,7 @@ public class SessionVariable implements Serializable, 
Writable {
         tResult.setEnableOrcLazyMat(enableOrcLazyMat);
         tResult.setEnableParquetFilterByMinMax(enableParquetFilterByMinMax);
         tResult.setEnableOrcFilterByMinMax(enableOrcFilterByMinMax);
+        tResult.setCheckOrcInitSargsSuccess(checkOrcInitSargsSuccess);
 
         tResult.setEnableDeleteSubPredicateV2(enableDeleteSubPredicateV2);
         tResult.setTruncateCharOrVarcharColumns(truncateCharOrVarcharColumns);
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index db3edcc9373..508d64f772a 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -362,6 +362,7 @@ struct TQueryOptions {
   150: optional bool enable_runtime_filter_partition_prune = true;
 
   163: optional bool inverted_index_compatible_read = false
+  164: optional bool check_orc_init_sargs_success = false
   // upgrade options. keep them same in every branch.
   200: optional bool new_is_ip_address_in_range = false;
 
diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc.out 
b/regression-test/data/external_table_p0/hive/test_hive_orc.out
index 066c5d4b4d3..03942dbe9fb 100644
Binary files a/regression-test/data/external_table_p0/hive/test_hive_orc.out 
and b/regression-test/data/external_table_p0/hive/test_hive_orc.out differ
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out 
b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out
index 8060ddd620c..7d21967daad 100644
Binary files 
a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out and 
b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out differ
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
index 0f837c0abd3..8d85feaa77a 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
@@ -81,6 +81,39 @@ suite("test_hive_orc", 
"all_types,p0,external,hive,external_docker,external_dock
        qt_string_col_dict_plain_mixed3 """select count(col2) from 
string_col_dict_plain_mixed_orc where col1 like '%Test%';"""
     }
 
+    def predicate_pushdown = {
+        qt_predicate_pushdown1 """ select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey is not null and (o_orderkey < 100 or 
o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000)); """
+        qt_predicate_pushdown2 """ select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 
and o_orderkey not in (200, 300, 400)); """
+        qt_predicate_pushdown3 """ select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey is not null and (o_orderkey < 100 or 
o_orderkey > 5999900 or o_orderkey = 3000000); """
+        qt_predicate_pushdown4 """ select count(o_orderkey) from 
tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 
1200000 and o_orderkey != 1100000); """
+        qt_predicate_pushdown5 """ SELECT count(o_orderkey) FROM 
tpch1_orc.orders WHERE (o_orderdate >= '1994-01-01' AND o_orderdate <= 
'1994-12-31') AND (o_orderpriority = '5-LOW' OR o_orderpriority = '3-MEDIUM') 
AND o_totalprice > 2000;"""
+        qt_predicate_pushdown6 """ SELECT count(o_orderkey) FROM 
tpch1_orc.orders WHERE o_orderstatus <> 'F' AND o_custkey < 54321; """
+        qt_predicate_pushdown7 """ SELECT count(o_orderkey) FROM 
tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = 
'1-URGENT'; """
+        qt_predicate_pushdown8 """ SELECT count(o_orderkey) FROM 
tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR o_clerk = 
'Clerk#000000470'; """
+
+        qt_predicate_pushdown_in1 """ select count(*)  from orc_all_types 
where boolean_col in (null); """
+        qt_predicate_pushdown_in2 """ select count(*)  from orc_all_types 
where boolean_col in (null, 0); """
+        qt_predicate_pushdown_in3 """ select count(*)  from orc_all_types 
where boolean_col in (null, 1); """
+
+        def test_col_is_null = { String col ->
+            "qt_orc_all_types_${col}_is_null" """ select count(*)  from 
orc_all_types where ${col} is null; """
+        }
+        test_col_is_null("tinyint_col")
+        test_col_is_null("smallint_col")
+        test_col_is_null("int_col")
+        test_col_is_null("bigint_col")
+        test_col_is_null("boolean_col")
+        test_col_is_null("float_col")
+        test_col_is_null("double_col")
+        test_col_is_null("string_col")
+        test_col_is_null("binary_col")
+        test_col_is_null("timestamp_col")
+        test_col_is_null("decimal_col")
+        test_col_is_null("char_col")
+        test_col_is_null("varchar_col")
+        test_col_is_null("date_col")
+    }
+
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled == null || !enabled.equalsIgnoreCase("true")) {
         logger.info("diable Hive test.")
@@ -108,6 +141,7 @@ suite("test_hive_orc", 
"all_types,p0,external,hive,external_docker,external_dock
             only_partition_col()
             decimals()
             string_col_dict_plain_mixed()
+            predicate_pushdown()
 
             sql """drop catalog if exists ${catalog_name}"""
 
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy
index 899c2d6ad40..3f45e6ff439 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy
@@ -43,6 +43,17 @@ suite("test_hive_orc_predicate", 
"p0,external,hive,external_docker,external_dock
             qt_predicate_changed_type2 """ select * from type_changed_table 
where id = '2';"""
             qt_predicate_changed_type3 """ select * from type_changed_table 
where id = '3';"""
 
+            qt_predicate_null_aware_equal_in_rt """select * from table_a inner 
join table_b on table_a.age <=> table_b.age and table_b.id in (1,3) order by 
table_a.id;"""
+
+            // use check_orc_init_sargs_success to test full acid push down
+            sql """use `${catalog_name}`.`default`"""
+            if (hivePrefix == "hive3") {
+                sql """ set check_orc_init_sargs_success = true; """
+                qt_predicate_full_acid_push_down """ select * from 
orc_full_acid_par where value = 'BB' order by id;"""
+                sql """ set check_orc_init_sargs_success = false; """
+            }
+
+            sql """use `${catalog_name}`.`multi_catalog`"""
             qt_lazy_materialization_for_list_type """ select l from 
complex_data_orc where id > 2 order by id; """
             qt_lazy_materialization_for_map_type """ select m from 
complex_data_orc where id > 2 order by id; """
             qt_lazy_materialization_for_list_and_map_type """ select * from 
complex_data_orc where id > 2 order by id; """
@@ -50,6 +61,7 @@ suite("test_hive_orc_predicate", 
"p0,external,hive,external_docker,external_dock
 
             sql """drop catalog if exists ${catalog_name}"""
         } finally {
+            sql """ set check_orc_init_sargs_success = false; """
         }
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to