This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 8edb0189692 [Fix](inverted index) fix comparison for string after
refactor compound #40338 (#40585)
8edb0189692 is described below
commit 8edb01896920ab218306af6766fff8fa67275ab0
Author: airborne12 <[email protected]>
AuthorDate: Tue Sep 10 14:27:28 2024 +0800
[Fix](inverted index) fix comparison for string after refactor compound
#40338 (#40585)
cherry pick from #40338
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 25 +++++-----------------
be/src/olap/rowset/segment_v2/segment_iterator.h | 3 +--
be/src/vec/functions/functions_comparison.h | 23 ++++++++++++--------
3 files changed, 20 insertions(+), 31 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 288a5df19ff..fdb0b929306 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -751,18 +751,7 @@ Status SegmentIterator::_extract_common_expr_columns(const
vectorized::VExprSPtr
return Status::OK();
}
-bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) {
- if (_opts.runtime_state &&
!_opts.runtime_state->query_options().enable_inverted_index_query) {
- return false;
- }
- if (_inverted_index_iterators[col_id] == nullptr) {
- //this column without inverted index
- return false;
- }
- return true;
-}
-
-bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred,
bool pred_in_compound) {
+bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) {
if (_opts.runtime_state &&
!_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
}
@@ -798,15 +787,11 @@ bool
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
bool handle_by_fulltext = _column_has_fulltext_index(pred_column_id);
if (handle_by_fulltext) {
- // when predicate in compound condition which except leafNode of
andNode,
- // only can apply match query for fulltext index,
// when predicate is leafNode of andNode,
- // can apply 'match qeury' and 'equal query' and 'list query' for
fulltext index.
- return (pred_in_compound ? pred->type() == PredicateType::MATCH
- : (pred->type() == PredicateType::MATCH ||
- pred->type() == PredicateType::IS_NULL ||
- pred->type() == PredicateType::IS_NOT_NULL
||
-
PredicateTypeTraits::is_equal_or_list(pred->type())));
+ // can apply 'match query' and 'equal query' and 'list query' for
fulltext index.
+ return pred->type() == PredicateType::MATCH || pred->type() ==
PredicateType::IS_NULL ||
+ pred->type() == PredicateType::IS_NOT_NULL ||
+ PredicateTypeTraits::is_equal_or_list(pred->type());
}
return true;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 6ffd1666f60..f5c133485aa 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -298,8 +298,7 @@ private:
void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate*
predicate);
- bool _check_apply_by_inverted_index(ColumnId col_id);
- bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool
pred_in_compound = false);
+ bool _check_apply_by_inverted_index(ColumnPredicate* pred);
void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx,
uint16_t select_size,
vectorized::Block* block);
diff --git a/be/src/vec/functions/functions_comparison.h
b/be/src/vec/functions/functions_comparison.h
index f6aa46fe1c4..bb1666ab864 100644
--- a/be/src/vec/functions/functions_comparison.h
+++ b/be/src/vec/functions/functions_comparison.h
@@ -546,14 +546,6 @@ public:
//NOT support comparison predicate when parser is FULLTEXT for
expr inverted index evaluate.
return Status::OK();
}
- std::string column_name = data_type_with_name.first;
- Field param_value;
- arguments[0].column->get(0, param_value);
- auto param_type =
arguments[0].type->get_type_as_type_descriptor().type;
-
- std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory>
query_param = nullptr;
-
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
- param_type, ¶m_value, query_param));
segment_v2::InvertedIndexQueryType query_type;
std::string_view name_view(name);
if (name_view == NameEquals::name || name_view == NameNotEquals::name)
{
@@ -570,6 +562,19 @@ public:
return Status::InvalidArgument("invalid comparison op type {}",
Name::name);
}
+ if (segment_v2::is_range_query(query_type) &&
+ iter->get_inverted_index_reader_type() ==
+ segment_v2::InvertedIndexReaderType::STRING_TYPE) {
+ // untokenized strings exceed ignore_above, they are written as
null, causing range query errors
+ return Status::OK();
+ }
+ std::string column_name = data_type_with_name.first;
+ Field param_value;
+ arguments[0].column->get(0, param_value);
+ auto param_type =
arguments[0].type->get_type_as_type_descriptor().type;
+ std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory>
query_param = nullptr;
+
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
+ param_type, ¶m_value, query_param));
std::shared_ptr<roaring::Roaring> roaring =
std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, ¶m_value, query_param));
@@ -585,7 +590,7 @@ public:
bitmap_result = result;
bitmap_result.mask_out_null();
- if (name == "ne") {
+ if (name_view == NameNotEquals::name) {
roaring::Roaring full_result;
full_result.addRange(0, num_rows);
bitmap_result.op_not(&full_result);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]