github-actions[bot] commented on code in PR #17444:
URL: https://github.com/apache/doris/pull/17444#discussion_r1128030512
##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -599,6 +804,318 @@ Status RowGroupReader::_filter_block_internal(Block*
block,
return Status::OK();
}
+Status RowGroupReader::_rewrite_dict_predicates() {
+ for (vector<std::string>::iterator it = _dict_filter_col_names.begin();
+ it != _dict_filter_col_names.end();) {
+ std::string& dict_filter_col_name = *it;
+ int slot_id = (*_colname_to_slot_id)[dict_filter_col_name];
+ Block temp_block;
+ MutableColumnPtr dict_value_column = ColumnString::create();
+ bool has_dict = false;
+
RETURN_IF_ERROR(_column_readers[dict_filter_col_name]->get_dict_values(dict_value_column,
+
&has_dict));
+ size_t dict_value_column_size = dict_value_column->size();
+ DCHECK(has_dict);
+// fprintf(stderr, "dict_value_column_size %ld\n",
dict_value_column_size);
+
+ int dict_pos = -1;
+ int i = 0;
+ for (const auto slot_desc : _tuple_descriptor->slots()) {
+ if (!slot_desc->need_materialize()) {
+ // should be ignore from reading
+ continue;
+ }
+ if (slot_desc->id() == slot_id) {
+ temp_block.insert(
+ {ColumnNullable::create(std::move(dict_value_column),
+
ColumnUInt8::create(dict_value_column_size, 0)),
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), ""});
+ dict_pos = i;
+
+ } else {
+
temp_block.insert(ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(),
+
slot_desc->get_data_type_ptr(),
+
slot_desc->col_name()));
+ }
+ ++i;
+ }
+
+ std::vector<VExprContext*>* ctxs = nullptr;
+ auto iter = _slot_id_to_filter_conjuncts->find(slot_id);
+ if (iter != _slot_id_to_filter_conjuncts->end()) {
+ ctxs = &(iter->second);
+ } else {
+ }
+// for (auto& ctx : (*ctxs)) {
+// VExpr* root = ctx->root();
+// _set_column_id(root);
+// }
+
+ std::vector<uint32_t> columns_to_filter(1, dict_pos);
+ int column_to_keep = temp_block.columns();
+// columns_to_filter.resize(column_to_keep);
+// for (uint32_t i = 0; i < column_to_keep; ++i) {
+// columns_to_filter[i] = i;
+// }
+ if (dict_pos != 0) {
+
temp_block.get_by_position(0).column->assume_mutable()->resize(dict_value_column_size);
+ }
+ RETURN_IF_ERROR(_execute_conjuncts_and_filter_block(*ctxs,
&temp_block, columns_to_filter, column_to_keep));
+ if (dict_pos != 0) {
+ temp_block.get_by_position(0).column->assume_mutable()->clear();
+ }
+// for (auto& ctx : (*ctxs)) {
+// VExpr* root = ctx->root();
+// _restore_column_id(root);
+// }
+
+ ColumnPtr &dict_column = temp_block.get_by_position(dict_pos).column;
+ // dict column is empty after conjunct eval, file group can be skipped
+ if (dict_column->size() == 0) {
+ _is_group_filtered = true;
+ return Status::OK();
+ }
+
+ // not supported now.
+ if (dict_column->size() > 1) {
+ for (auto& ctx : (*ctxs)) {
+ _filter_conjuncts.push_back(ctx);
+ }
+ it = _dict_filter_col_names.erase(it);
+ continue;
+ }
+
+ // get dict codes
+ std::vector<int32_t> dict_codes;
+ const ColumnNullable* nullable_column =
+ static_cast<const ColumnNullable*>(dict_column.get());
+ const ColumnString* nested_column =
+ static_cast<const
ColumnString*>(nullable_column->get_nested_column_ptr().get());
+ RETURN_IF_ERROR(
+
_column_readers[dict_filter_col_name]->get_dict_codes(nested_column,
&dict_codes));
+
+ // now only support one dict_code
+// DCHECK_EQ(dict_codes.size(), 1);
+// fprintf(stderr, "dict_codes.size(): %ld\n", dict_codes.size());
+ if (dict_codes.size() == 1) {
+ VExpr* root;
+ {
+ TFunction fn;
+ TFunctionName fn_name;
+ fn_name.__set_db_name("");
+ fn_name.__set_function_name("eq");
+ fn.__set_name(fn_name);
+ fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+ std::vector<TTypeDesc> arg_types;
+ arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));
+ arg_types.push_back(create_type_desc(PrimitiveType::TYPE_INT));
+ fn.__set_arg_types(arg_types);
+
fn.__set_ret_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+ fn.__set_has_var_args(false);
+
+ TExprNode texpr_node;
+
texpr_node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+ texpr_node.__set_node_type(TExprNodeType::BINARY_PRED);
+ texpr_node.__set_opcode(TExprOpcode::EQ);
+ texpr_node.__set_vector_opcode(TExprOpcode::EQ);
+ texpr_node.__set_fn(fn);
+ texpr_node.__set_is_nullable(true);
+ texpr_node.__set_child_type(TPrimitiveType::INT);
+ texpr_node.__set_num_children(2);
+ root = _obj_pool->add(new VectorizedFnCall(texpr_node));
+ }
+ {
+ SlotDescriptor* slot = nullptr;
+ const std::vector<SlotDescriptor*>& slots =
_tuple_descriptor->slots();
+ for (auto each : slots) {
+ if (each->id() == slot_id) {
+ slot = each;
+ break;
+ }
+ }
+ VExpr* slot_ref_expr = _obj_pool->add(new VSlotRef(slot));
+ root->add_child(slot_ref_expr);
+ }
+
+ {
+ TExprNode texpr_node;
+ texpr_node.__set_node_type(TExprNodeType::INT_LITERAL);
+ texpr_node.__set_type(create_type_desc(TYPE_INT));
+ // for (int i = 0; i < dict_codes.size(); ++i) {
+ TIntLiteral int_literal;
+// fprintf(stderr, "dict_codes[0]: %d\n", dict_codes[0]);
+ int_literal.__set_value(dict_codes[0]);
+ texpr_node.__set_int_literal(int_literal);
+ VExpr* literal_expr = _obj_pool->add(new VLiteral(texpr_node));
+ root->add_child(literal_expr);
+ // }
+ }
+ VExprContext* rewritten_conjunct_ctx = _obj_pool->add(new
VExprContext(root));
+ RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(
+ _state,
RowDescriptor(const_cast<TupleDescriptor*>(_tuple_descriptor), true)));
+ RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state));
+ _dict_filter_conjunct.push_back(rewritten_conjunct_ctx);
+ _filter_conjuncts.push_back(rewritten_conjunct_ctx);
+ ++it;
+ } else {
+// VExpr* root;
+// {
+// TFunction fn;
+// TFunctionName fn_name;
+// fn_name.__set_db_name("");
+// fn_name.__set_function_name("in");
+// fn.__set_name(fn_name);
+//
fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+//
fn.__set_ret_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+// fn.__set_has_var_args(false);
+//
+// TExprNode texpr_node;
+//
texpr_node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+//
texpr_node.__set_node_type(TExprNodeType::IN_PRED);
+// texpr_node.__set_opcode(TExprOpcode::FILTER_IN);
+//
texpr_node.__set_vector_opcode(TExprOpcode::FILTER_IN);
+// texpr_node.__set_fn(fn);
+// texpr_node.__set_is_nullable(true);
+// texpr_node.__set_child_type(TPrimitiveType::INT);
+// texpr_node.__set_num_children(1 +
dict_codes.size());
+// root = _obj_pool->add(new
VInPredicate(texpr_node));
+// }
+// {
+// SlotDescriptor* slot = nullptr;
+// const std::vector<SlotDescriptor*>& slots =
_tuple_descriptor->slots();
+// for (auto each : slots) {
+// if (each->id() == slot_id) {
+// slot = each;
+// break;
+// }
+// }
+// VExpr* slot_ref_expr = new VSlotRef(slot);
+// root->add_child(slot_ref_expr);
+// }
+//
+// {
+// for (int j = 0; j < dict_codes.size(); ++j) {
+// TExprNode texpr_node;
+//
texpr_node.__set_node_type(TExprNodeType::INT_LITERAL);
+//
texpr_node.__set_type(create_type_desc(TYPE_INT));
+// TIntLiteral int_literal;
+// // fprintf(stderr,
"dict_codes[0]: %d\n", dict_codes[0]);
+// int_literal.__set_value(dict_codes[j]);
+// texpr_node.__set_int_literal(int_literal);
+// VExpr* literal_expr = _obj_pool->add(new
VLiteral(texpr_node));
+// root->add_child(literal_expr);
+// }
+// }
+// VExprContext* rewritten_conjunct_ctx =
_obj_pool->add(new VExprContext(root));
+// RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(
+// _state,
RowDescriptor(const_cast<TupleDescriptor*>(_tuple_descriptor), true)));
+//
RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state));
+//
_dict_filter_conjunct.push_back(rewritten_conjunct_ctx);
+// _filter_conjuncts.push_back(rewritten_conjunct_ctx);
+// ++it;
+ VDirectInPredicate* root;
+ {
+ TTypeDesc type_desc =
create_type_desc(PrimitiveType::TYPE_BOOLEAN);
+ type_desc.__set_is_nullable(false);
+ TExprNode node;
+ node.__set_type(type_desc);
+ node.__set_node_type(TExprNodeType::IN_PRED);
+ node.in_predicate.__set_is_not_in(false);
+ node.__set_opcode(TExprOpcode::FILTER_IN);
+ node.__isset.vector_opcode = true;
+ node.__set_vector_opcode(TExprOpcode::FILTER_IN);
+ node.__set_is_nullable(false);
+
+ root = _obj_pool->add(new
vectorized::VDirectInPredicate(node));
+ std::shared_ptr<HybridSetBase>
hybrid_set(create_set(PrimitiveType::TYPE_INT));
+ for (int j = 0; j < dict_codes.size(); ++j) {
+ hybrid_set->insert(&dict_codes[j]);
+ }
+ root->set_filter(hybrid_set);
+ }
+
+ {
+ SlotDescriptor* slot = nullptr;
+ const std::vector<SlotDescriptor*>& slots =
_tuple_descriptor->slots();
+ for (auto each : slots) {
+ if (each->id() == slot_id) {
+ slot = each;
+ break;
+ }
+ }
+ VExpr* slot_ref_expr = _obj_pool->add(new VSlotRef(slot));
+ root->add_child(slot_ref_expr);
+ }
+
+ VExprContext* rewritten_conjunct_ctx = _obj_pool->add(new
VExprContext(root));
+ RETURN_IF_ERROR(rewritten_conjunct_ctx->prepare(
+ _state,
RowDescriptor(const_cast<TupleDescriptor*>(_tuple_descriptor), true)));
+ RETURN_IF_ERROR(rewritten_conjunct_ctx->open(_state));
+ _dict_filter_conjunct.push_back(rewritten_conjunct_ctx);
+ _filter_conjuncts.push_back(rewritten_conjunct_ctx);
+ ++it;
+ }
+ }
+ return Status::OK();
+}
+
+//void RowGroupReader::_convert_dict_cols_to_string_cols(Block* block) {
+// for (auto& _dict_filter_col_name : _dict_filter_col_names) {
+// ColumnPtr& column = block->get_by_name(_dict_filter_col_name).column;
+// if (auto* nullable_column =
check_and_get_column<ColumnNullable>(*column)) {
+// MutableColumnPtr nested_column =
+//
nullable_column->get_nested_column_ptr()->assume_mutable();
+// auto* dict_column =
typeid_cast<ColumnDictI32*>(nested_column.get());
+// DCHECK(dict_column);
+//
+//
+// MutableColumnPtr string_column =
+// dict_column->convert_to_string_column_if_dictionary();
+//
+// size_t pos = block->get_position_by_name(_dict_filter_col_name);
+//
+// block->get_by_position(pos).type =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+// block->replace_by_position(
+// pos,
+// ColumnNullable::create(std::move(string_column),
+//
nullable_column->get_null_map_column_ptr()));
+// }
+// }
+//}
+
+void RowGroupReader::_convert_dict_cols_to_string_cols(Block* block) {
+ for (auto& dict_filter_col_name : _dict_filter_col_names) {
+ ColumnPtr& column = block->get_by_name(dict_filter_col_name).column;
+ if (auto* nullable_column =
check_and_get_column<ColumnNullable>(*column)) {
+ const ColumnPtr& nested_column =
nullable_column->get_nested_column_ptr();
+ const ColumnDictI32* dict_column = assert_cast<const
ColumnDictI32*>(nested_column.get());
+ DCHECK(dict_column);
+
+ MutableColumnPtr string_column =
_column_readers[dict_filter_col_name]->convert_dict_column_to_string_column(dict_column);
+
+ size_t pos = block->get_position_by_name(dict_filter_col_name);
+
+ block->get_by_position(pos).type =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+ block->replace_by_position(
+ pos, ColumnNullable::create(std::move(string_column),
+
nullable_column->get_null_map_column_ptr()));
+ } else if (auto* const_column =
check_and_get_column<ColumnConst>(*column)) {
+ !const_column->get_bool(0);
Review Comment:
warning: expression result unused [clang-diagnostic-unused-value]
```cpp
!const_column->get_bool(0);
^
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]