github-actions[bot] commented on code in PR #43255:
URL: https://github.com/apache/doris/pull/43255#discussion_r1831486112
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -461,8 +464,10 @@ static std::unordered_map<orc::TypeKind,
orc::PredicateDataType> TYPEKIND_TO_PRE
{orc::TypeKind::BOOLEAN, orc::PredicateDataType::BOOLEAN}};
template <PrimitiveType primitive_type>
-std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type,
const void* value,
- int precision, int
scale) {
+std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type,
Review Comment:
warning: function 'convert_to_orc_literal' exceeds recommended
size/complexity thresholds [readability-function-size]
```cpp
std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type,
^
```
<details>
<summary>Additional context</summary>
**be/src/vec/exec/format/orc/vorc_reader.cpp:466:** 94 lines including
whitespace and comments (threshold 80)
```cpp
std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type,
^
```
</details>
##########
be/test/testutil/desc_tbl_builder.h:
##########
@@ -20,15 +20,16 @@
#include <gen_cpp/Descriptors_types.h>
Review Comment:
warning: 'gen_cpp/Descriptors_types.h' file not found
[clang-diagnostic-error]
```cpp
#include <gen_cpp/Descriptors_types.h>
^
```
##########
be/test/testutil/desc_tbl_builder.cpp:
##########
@@ -17,20 +17,9 @@
#include "testutil/desc_tbl_builder.h"
Review Comment:
warning: 'testutil/desc_tbl_builder.h' file not found
[clang-diagnostic-error]
```cpp
#include "testutil/desc_tbl_builder.h"
^
```
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -558,190 +562,311 @@
}
}
-template <PrimitiveType primitive_type>
-std::vector<OrcPredicate> value_range_to_predicate(
- const ColumnValueRange<primitive_type>& col_val_range, const
orc::Type* type,
- std::vector<orc::TypeKind>* unsupported_pushdown_types) {
- std::vector<OrcPredicate> predicates;
-
- PrimitiveType src_type = OrcReader::convert_to_doris_type(type).type;
- if (src_type != primitive_type) {
- if (!(is_string_type(src_type) && is_string_type(primitive_type))) {
- // not support schema change
- return predicates;
- }
- }
-
- if (unsupported_pushdown_types != nullptr) {
- for (vector<orc::TypeKind>::iterator it =
unsupported_pushdown_types->begin();
- it != unsupported_pushdown_types->end(); ++it) {
- if (*it == type->getKind()) {
- // Unsupported type
- return predicates;
- }
- }
+std::tuple<bool, orc::Literal, orc::PredicateDataType>
OrcReader::_make_orc_leteral(
+ const VSlotRef* slot_ref, const VLiteral* literal) {
+ auto literal_data = literal->get_column_ptr()->get_data_at(0);
+ auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()];
+ auto slot_type = slot->type();
+ const auto* orc_type =
_type_map[_col_name_to_file_col_name[slot->col_name()]];
+ const auto predicate_type =
TYPEKIND_TO_PREDICATE_TYPE[orc_type->getKind()];
+ switch (slot_type.type) {
+#define M(NAME)
\
+ case TYPE_##NAME: {
\
+ auto [valid, orc_literal] = convert_to_orc_literal<TYPE_##NAME>(
\
+ orc_type, literal_data, slot_type.precision, slot_type.scale);
\
+ return std::make_tuple(valid, orc_literal, predicate_type);
\
+ }
+#define APPLY_FOR_PRIMITIVE_TYPE(M) \
+ M(TINYINT) \
+ M(SMALLINT) \
+ M(INT) \
+ M(BIGINT) \
+ M(LARGEINT) \
+ M(CHAR) \
+ M(DATE) \
+ M(DATETIME) \
+ M(DATEV2) \
+ M(DATETIMEV2) \
+ M(VARCHAR) \
+ M(STRING) \
+ M(HLL) \
+ M(DECIMAL32) \
+ M(DECIMAL64) \
+ M(DECIMAL128I) \
+ M(DECIMAL256) \
+ M(DECIMALV2) \
+ M(BOOLEAN) \
+ M(IPV4) \
+ M(IPV6)
+ APPLY_FOR_PRIMITIVE_TYPE(M)
+#undef M
+ default: {
+ VLOG_CRITICAL << "Unsupported Convert Orc Literal [ColName=" <<
slot->col_name() << "]";
+ return std::make_tuple(false, orc::Literal(false), predicate_type);
}
-
- orc::PredicateDataType predicate_data_type;
- auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind());
- if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) {
- // Unsupported type
- return predicates;
- } else {
- predicate_data_type = type_it->second;
}
+}
- if (col_val_range.is_fixed_value_range()) {
- OrcPredicate in_predicate;
- in_predicate.col_name = col_val_range.column_name();
- in_predicate.data_type = predicate_data_type;
- in_predicate.op = SQLFilterOp::FILTER_IN;
- for (const auto& value : col_val_range.get_fixed_value_set()) {
- auto [valid, literal] = convert_to_orc_literal<primitive_type>(
- type, &value, col_val_range.precision(),
col_val_range.scale());
- if (valid) {
- in_predicate.literals.push_back(literal);
+// check if the expr can be pushed down to orc reader
+static bool check_expr_can_push_down(const VExprSPtr& expr) {
+ DCHECK_NOTNULL(expr);
+ switch (expr->op()) {
+ case TExprOpcode::COMPOUND_AND:
+ case TExprOpcode::COMPOUND_OR:
+ case TExprOpcode::COMPOUND_NOT:
+ // at least one child can be pushed down
+ return std::ranges::any_of(expr->children(), check_expr_can_push_down);
+ case TExprOpcode::GE:
+ case TExprOpcode::GT:
+ case TExprOpcode::LE:
+ case TExprOpcode::LT:
+ case TExprOpcode::EQ:
+ case TExprOpcode::NE:
+ case TExprOpcode::FILTER_IN:
+ case TExprOpcode::FILTER_NOT_IN:
+ return true;
+ case TExprOpcode::INVALID_OPCODE:
+ if (expr->node_type() == TExprNodeType::FUNCTION_CALL) {
+ auto fn_name = expr->fn().name.function_name;
+ // only support is_null_pred and is_not_null_pred
+ if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") {
+ return true;
}
+ LOG(WARNING) << "Unsupported function [funciton=" << fn_name <<
"]";
}
- if (!in_predicate.literals.empty()) {
- predicates.emplace_back(in_predicate);
- }
- return predicates;
+ return false;
+ default:
+ VLOG_CRITICAL << "Unsupported Opcode [OpCode=" << expr->op() << "]";
+ return false;
}
+}
- const auto& high_value = col_val_range.get_range_max_value();
- const auto& low_value = col_val_range.get_range_min_value();
- const auto& high_op = col_val_range.get_range_high_op();
- const auto& low_op = col_val_range.get_range_low_op();
+// convert expr to sargs recursively
+bool OrcReader::_build_search_argument(const VExprSPtr& expr,
Review Comment:
warning: function '_build_search_argument' exceeds recommended
size/complexity thresholds [readability-function-size]
```cpp
bool OrcReader::_build_search_argument(const VExprSPtr& expr,
^
```
<details>
<summary>Additional context</summary>
**be/src/vec/exec/format/orc/vorc_reader.cpp:644:** 195 lines including
whitespace and comments (threshold 80)
```cpp
bool OrcReader::_build_search_argument(const VExprSPtr& expr,
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]