This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 5cb03e96010 [fix](inverted index) disable range query in
StringTypeInvertedIndexReader (#38218) (#38722)
5cb03e96010 is described below
commit 5cb03e9601025bf92ce1e4df6144530e20ced7c9
Author: Sun Chenyang <[email protected]>
AuthorDate: Thu Aug 1 23:19:44 2024 +0800
[fix](inverted index) disable range query in StringTypeInvertedIndexReader
(#38218) (#38722)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 13 +++++-
.../test_ignore_above_in_index.out | 3 ++
.../test_ignore_above_in_index.groovy | 48 ++++++++++++++++++++++
3 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 6ecce454394..c79a8d33bc2 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -813,7 +813,8 @@ bool
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
if (_opts.runtime_state &&
!_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
}
- if (_inverted_index_iterators[pred->column_id()] == nullptr) {
+ auto pred_column_id = pred->column_id();
+ if (_inverted_index_iterators[pred_column_id] == nullptr) {
//this column without inverted index
return false;
}
@@ -828,13 +829,21 @@ bool
SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
return false;
}
+ // UNTOKENIZED strings exceed ignore_above, they are written as null,
causing range query errors
+ if (PredicateTypeTraits::is_range(pred->type()) &&
+ _inverted_index_iterators[pred_column_id] != nullptr &&
+
_inverted_index_iterators[pred_column_id]->get_inverted_index_reader_type() ==
+ InvertedIndexReaderType::STRING_TYPE) {
+ return false;
+ }
+
// Function filter no apply inverted index
if (dynamic_cast<LikeColumnPredicate<TYPE_CHAR>*>(pred) != nullptr ||
dynamic_cast<LikeColumnPredicate<TYPE_STRING>*>(pred) != nullptr) {
return false;
}
- bool handle_by_fulltext = _column_has_fulltext_index(pred->column_id());
+ bool handle_by_fulltext = _column_has_fulltext_index(pred_column_id);
if (handle_by_fulltext) {
// when predicate in compound condition which except leafNode of
andNode,
// only can apply match query for fulltext index,
diff --git
a/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
b/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
index f88a155567e..718bd29e5d9 100644
--- a/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
+++ b/regression-test/data/inverted_index_p0/test_ignore_above_in_index.out
@@ -2,3 +2,6 @@
-- !sql --
3
+-- !sql --
+772
+
diff --git
a/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
b/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
index de508d9d263..c6b33c7baee 100644
--- a/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
+++ b/regression-test/suites/inverted_index_p0/test_ignore_above_in_index.groovy
@@ -39,4 +39,52 @@ suite("test_ignore_above_in_index", "p0") {
sql "insert into ${tableName} values (20, '1234567890');"
sql "insert into ${tableName} values (20, '1234567890');"
qt_sql "select count() from ${tableName} where c = '1234567890';"
+
+ def tableName2 = "test_ignore_above_in_index2"
+
+ sql "DROP TABLE IF EXISTS ${tableName2}"
+ sql """
+ CREATE TABLE ${tableName2} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` string NULL COMMENT "",
+ `request` string NULL COMMENT "",
+ `status` int NULL COMMENT "",
+ `size` int NULL COMMENT "",
+ INDEX clientip_idx (`clientip`) USING INVERTED
PROPERTIES("ignore_above"="5") COMMENT '',
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"unicode", "support_phrase" = "true") COMMENT '',
+ INDEX status_idx (`status`) USING INVERTED COMMENT '',
+ INDEX size_idx (`size`) USING INVERTED COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // load the json data
+ streamLoad {
+ table "${tableName2}"
+
+ set 'read_json_by_line', 'true'
+ set 'format', 'json'
+ file 'documents-1000.json' // import json file
+ time 10000 // limit inflight 10s
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(json.NumberTotalRows, json.NumberLoadedRows +
json.NumberUnselectedRows)
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+ }
+ }
+
+ qt_sql "select count() from ${tableName2} where clientip > '17.0';"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]