github-actions[bot] commented on code in PR #17444:
URL: https://github.com/apache/doris/pull/17444#discussion_r1128030512


##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -599,6 +804,318 @@ Status RowGroupReader::_filter_block_internal(Block* 
block,
     return Status::OK();
 }
 
+Status RowGroupReader::_rewrite_dict_predicates() {
+    for (vector<std::string>::iterator it = _dict_filter_col_names.begin();
+         it != _dict_filter_col_names.end();) {
+        std::string& dict_filter_col_name = *it;
+        int slot_id = (*_colname_to_slot_id)[dict_filter_col_name];
+        Block temp_block;
+        MutableColumnPtr dict_value_column = ColumnString::create();
+        bool has_dict = false;
+        
RETURN_IF_ERROR(_column_readers[dict_filter_col_name]->get_dict_values(dict_value_column,
+                                                                               
&has_dict));
+        size_t dict_value_column_size = dict_value_column->size();
+        DCHECK(has_dict);
+//        fprintf(stderr, "dict_value_column_size %ld\n", 
dict_value_column_size);
+
+        int dict_pos = -1;
+        int i = 0;
+        for (const auto slot_desc : _tuple_descriptor->slots()) {
+            if (!slot_desc->need_materialize()) {
+                // should be ignore from reading
+                continue;
+            }
+            if (slot_desc->id() == slot_id) {
+                temp_block.insert(
+                        {ColumnNullable::create(std::move(dict_value_column),
+                                                
ColumnUInt8::create(dict_value_column_size, 0)),
+                         
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), ""});
+                dict_pos = i;
+
+            } else {
+                
temp_block.insert(ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(),
+                                                         
slot_desc->get_data_type_ptr(),
+                                                         
slot_desc->col_name()));
+            }
+            ++i;
+        }
+
+        std::vector<VExprContext*>* ctxs = nullptr;
+        auto iter = _slot_id_to_filter_conjuncts->find(slot_id);
+        if (iter != _slot_id_to_filter_conjuncts->end()) {
+            ctxs = &(iter->second);
+        } else {
+        }
+//        for (auto& ctx : (*ctxs)) {
+//            VExpr* root = ctx->root();
+//            _set_column_id(root);
+//        }
+
+        std::vector<uint32_t> columns_to_filter(1, dict_pos);
+        int column_to_keep = temp_block.columns();
+//        columns_to_filter.resize(column_to_keep);
+//        for (uint32_t i = 0; i < column_to_keep; ++i) {
+//            columns_to_filter[i] = i;
+//        }
+        if (dict_pos != 0) {
+            
temp_block.get_by_position(0).column->assume_mutable()->resize(dict_value_column_size);
+        }
+        RETURN_IF_ERROR(_execute_conjuncts_and_filter_block(*ctxs, 
&temp_block, columns_to_filter, column_to_keep));
+        if (dict_pos != 0) {
+            temp_block.get_by_position(0).column->assume_mutable()->clear();
+        }
+//        for (auto& ctx : (*ctxs)) {
+//            VExpr* root = ctx->root();
+//            _restore_column_id(root);
+//        }
+
+        ColumnPtr &dict_column = temp_block.get_by_position(dict_pos).column;
+        // dict column is empty after conjunct eval, file group can be skipped
+        if (dict_column->size() == 0) {
+            _is_group_filtered = true;
+            return Status::OK();
+        }
+
+        // not supported now.
+        if (dict_column->size() > 1) {
+            for (auto& ctx : (*ctxs)) {
+                _filter_conjuncts.push_back(ctx);
+            }
+            it = _dict_filter_col_names.erase(it);
+            continue;
+        }
+
+        // get dict codes
+        std::vector<int32_t> dict_codes;
+        const ColumnNullable* nullable_column =
+                static_cast<const ColumnNullable*>(dict_column.get());
+        const ColumnString* nested_column =
+                static_cast<const 
ColumnString*>(nullable_column->get_nested_column_ptr().get());
+        RETURN_IF_ERROR(
+                
_column_readers[dict_filter_col_name]->get_dict_codes(nested_column, 
&dict_codes));
+
+        // now only support one dict_code
+//        DCHECK_EQ(dict_codes.size(), 1);
+//        fprintf(stderr, "dict_codes.size(): %ld\n", dict_codes.size());
+        if (dict_codes.size() == 1) {
+            VExpr* root;
+            {
+                TFunction fn;
+                TFunctionName fn_name;
+                fn_name.__set_db_name("");
+                fn_name.__set_function_name("eq");
+                fn.__set_name(fn_name);
+                fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+                std::vector<TTypeDesc> arg_types;
+                arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));
+                arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));
+                fn.__set_arg_types(arg_types);
+                
fn.__set_ret_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+                fn.__set_has_var_args(false);
+
+                TExprNode texpr_node;
+                
texpr_node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+                texpr_node.__set_node_type(TExprNodeType::BINARY_PRED);
+                texpr_node.__set_opcode(TExprOpcode::EQ);
+                texpr_node.__set_vector_opcode(TExprOpcode::EQ);
+                texpr_node.__set_fn(fn);
+                texpr_node.__set_is_nullable(true);
+                texpr_node.__set_child_type(TPrimitiveType::INT);
+                texpr_node.__set_num_children(2);
+                root = _obj_pool->add(new VectorizedFnCall(texpr_node));
+            }
+            {
+                SlotDescriptor* slot = nullptr;
+                const std::vector<SlotDescriptor*>& slots = 
_tuple_descriptor->slots();
+                for (auto each : slots) {
+                    if (each->id() == slot_id) {
+                        slot = each;
+                        break;
+                    }
+                }
+                VExpr* slot_ref_expr = _obj_pool->add(new VSlotRef(slot));
+                root->add_child(slot_ref_expr);
+            }
+
+            {
+                TExprNode texpr_node;
+                texpr_node.__set_node_type(TExprNodeType::INT_LITERAL);
+                texpr_node.__set_type(create_type_desc(TYPE_INT));
+                //                for (int i = 0; i < dict_codes.size(); ++i) {
+                TIntLiteral int_literal;
+//                fprintf(stderr, "dict_codes[0]: %d\n", dict_codes[0]);
+                int_literal.__set_value(dict_codes[0]);
+                texpr_node.__set_int_literal(int_literal);
+                VExpr* literal_expr = _obj_pool->add(new VLiteral(texpr_node));
+                root->add_child(literal_expr);
+                //                }
+            }
+            VExprContext* rewritten_conjunct_ctx = _obj_pool->add(new 
VExprContext(root));
+            RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(
+                    _state, 
RowDescriptor(const_cast<TupleDescriptor*>(_tuple_descriptor), true)));
+            RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state));
+            _dict_filter_conjunct.push_back(rewritten_conjunct_ctx);
+            _filter_conjuncts.push_back(rewritten_conjunct_ctx);
+            ++it;
+        } else {
+//                        VExpr* root;
+//                        {
+//                            TFunction fn;
+//                            TFunctionName fn_name;
+//                            fn_name.__set_db_name("");
+//                            fn_name.__set_function_name("in");
+//                            fn.__set_name(fn_name);
+//                            
fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+//                            
fn.__set_ret_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+//                            fn.__set_has_var_args(false);
+//
+//                            TExprNode texpr_node;
+//                            
texpr_node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+//                            
texpr_node.__set_node_type(TExprNodeType::IN_PRED);
+//                            texpr_node.__set_opcode(TExprOpcode::FILTER_IN);
+//                            
texpr_node.__set_vector_opcode(TExprOpcode::FILTER_IN);
+//                            texpr_node.__set_fn(fn);
+//                            texpr_node.__set_is_nullable(true);
+//                            texpr_node.__set_child_type(TPrimitiveType::INT);
+//                            texpr_node.__set_num_children(1 + 
dict_codes.size());
+//                            root = _obj_pool->add(new 
VInPredicate(texpr_node));
+//                        }
+//                        {
+//                            SlotDescriptor* slot = nullptr;
+//                            const std::vector<SlotDescriptor*>& slots = 
_tuple_descriptor->slots();
+//                            for (auto each : slots) {
+//                                if (each->id() == slot_id) {
+//                                    slot = each;
+//                                    break;
+//                                }
+//                            }
+//                            VExpr* slot_ref_expr = new VSlotRef(slot);
+//                            root->add_child(slot_ref_expr);
+//                        }
+//
+//                        {
+//                            for (int j = 0; j < dict_codes.size(); ++j) {
+//                                TExprNode texpr_node;
+//                                
texpr_node.__set_node_type(TExprNodeType::INT_LITERAL);
+//                                
texpr_node.__set_type(create_type_desc(TYPE_INT));
+//                                TIntLiteral int_literal;
+//                                //                fprintf(stderr, 
"dict_codes[0]: %d\n", dict_codes[0]);
+//                                int_literal.__set_value(dict_codes[j]);
+//                                texpr_node.__set_int_literal(int_literal);
+//                                VExpr* literal_expr = _obj_pool->add(new 
VLiteral(texpr_node));
+//                                root->add_child(literal_expr);
+//                            }
+//                        }
+//                        VExprContext* rewritten_conjunct_ctx = 
_obj_pool->add(new VExprContext(root));
+//                        RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(
+//                                _state, 
RowDescriptor(const_cast<TupleDescriptor*>(_tuple_descriptor), true)));
+//                        
RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state));
+//                        
_dict_filter_conjunct.push_back(rewritten_conjunct_ctx);
+//                        _filter_conjuncts.push_back(rewritten_conjunct_ctx);
+//                        ++it;
+            VDirectInPredicate* root;
+            {
+                TTypeDesc type_desc = 
create_type_desc(PrimitiveType::TYPE_BOOLEAN);
+                type_desc.__set_is_nullable(false);
+                TExprNode node;
+                node.__set_type(type_desc);
+                node.__set_node_type(TExprNodeType::IN_PRED);
+                node.in_predicate.__set_is_not_in(false);
+                node.__set_opcode(TExprOpcode::FILTER_IN);
+                node.__isset.vector_opcode = true;
+                node.__set_vector_opcode(TExprOpcode::FILTER_IN);
+                node.__set_is_nullable(false);
+
+                root = _obj_pool->add(new 
vectorized::VDirectInPredicate(node));
+                std::shared_ptr<HybridSetBase> 
hybrid_set(create_set(PrimitiveType::TYPE_INT));
+                for (int j = 0; j < dict_codes.size(); ++j) {
+                    hybrid_set->insert(&dict_codes[j]);
+                }
+                root->set_filter(hybrid_set);
+            }
+
+            {
+                SlotDescriptor* slot = nullptr;
+                const std::vector<SlotDescriptor*>& slots = 
_tuple_descriptor->slots();
+                for (auto each : slots) {
+                    if (each->id() == slot_id) {
+                        slot = each;
+                        break;
+                    }
+                }
+                VExpr* slot_ref_expr = _obj_pool->add(new VSlotRef(slot));
+                root->add_child(slot_ref_expr);
+            }
+
+            VExprContext* rewritten_conjunct_ctx = _obj_pool->add(new 
VExprContext(root));
+            RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(
+                    _state, 
RowDescriptor(const_cast<TupleDescriptor*>(_tuple_descriptor), true)));
+            RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state));
+            _dict_filter_conjunct.push_back(rewritten_conjunct_ctx);
+            _filter_conjuncts.push_back(rewritten_conjunct_ctx);
+            ++it;
+        }
+    }
+    return Status::OK();
+}
+
+//void RowGroupReader::_convert_dict_cols_to_string_cols(Block* block) {
+//    for (auto& _dict_filter_col_name : _dict_filter_col_names) {
+//        ColumnPtr& column = block->get_by_name(_dict_filter_col_name).column;
+//        if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*column)) {
+//            MutableColumnPtr nested_column =
+//                    
nullable_column->get_nested_column_ptr()->assume_mutable();
+//            auto* dict_column = 
typeid_cast<ColumnDictI32*>(nested_column.get());
+//            DCHECK(dict_column);
+//
+//
+//            MutableColumnPtr string_column =
+//                    dict_column->convert_to_string_column_if_dictionary();
+//
+//            size_t pos = block->get_position_by_name(_dict_filter_col_name);
+//
+//            block->get_by_position(pos).type = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+//            block->replace_by_position(
+//                    pos,
+//                    ColumnNullable::create(std::move(string_column),
+//                                           
nullable_column->get_null_map_column_ptr()));
+//        }
+//    }
+//}
+
+void RowGroupReader::_convert_dict_cols_to_string_cols(Block* block) {
+    for (auto& dict_filter_col_name : _dict_filter_col_names) {
+        ColumnPtr& column = block->get_by_name(dict_filter_col_name).column;
+        if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*column)) {
+            const ColumnPtr& nested_column = 
nullable_column->get_nested_column_ptr();
+            const ColumnDictI32* dict_column = assert_cast<const 
ColumnDictI32*>(nested_column.get());
+            DCHECK(dict_column);
+
+            MutableColumnPtr string_column = 
_column_readers[dict_filter_col_name]->convert_dict_column_to_string_column(dict_column);
+
+            size_t pos = block->get_position_by_name(dict_filter_col_name);
+
+            block->get_by_position(pos).type =
+                    
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+            block->replace_by_position(
+                    pos, ColumnNullable::create(std::move(string_column),
+                                                
nullable_column->get_null_map_column_ptr()));
+        } else if (auto* const_column = 
check_and_get_column<ColumnConst>(*column)) {
+            !const_column->get_bool(0);

Review Comment:
   warning: expression result unused [clang-diagnostic-unused-value]
   ```cpp
               !const_column->get_bool(0);
               ^
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to