This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 28f7ed3193f [Fix](inverted index) fix wrong opt for pk no need read 
data (#36618)
28f7ed3193f is described below

commit 28f7ed3193fe1d25c29ca2e2940398cc8c656df0
Author: airborne12 <[email protected]>
AuthorDate: Fri Jun 21 15:16:01 2024 +0800

    [Fix](inverted index) fix wrong opt for pk no need read data (#36618)
    
    The optimization to avoid reading data for primary keys (PK) in
    segment_iterator can lead to incorrect results when there are function
    expressions involving the same PK column in the predicate.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  3 +
 .../test_pk_no_need_read_data.out                  | 13 +++++
 .../test_pk_no_need_read_data.groovy               | 66 ++++++++++++++++++++++
 3 files changed, 82 insertions(+)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f0c3f8f4920..2eee1e7e620 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2851,6 +2851,9 @@ bool SegmentIterator::_no_need_read_key_data(ColumnId 
cid, vectorized::MutableCo
     if (cids.contains(cid)) {
         return false;
     }
+    if 
(_column_pred_in_remaining_vconjunct.contains(_opts.tablet_schema->column(cid).name()))
 {
+        return false;
+    }
 
     if (column->is_nullable()) {
         auto* nullable_col_ptr = 
reinterpret_cast<vectorized::ColumnNullable*>(column.get());
diff --git 
a/regression-test/data/inverted_index_p0/test_pk_no_need_read_data.out 
b/regression-test/data/inverted_index_p0/test_pk_no_need_read_data.out
new file mode 100644
index 00000000000..b38181b1845
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_pk_no_need_read_data.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_0 --
+1
+
+-- !select_1 --
+1
+
+-- !select_2 --
+1
+
+-- !select_3 --
+1
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy 
b/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy
new file mode 100644
index 00000000000..4aa969debda
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_pk_no_need_read_data.groovy
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_pk_no_need_read_data", "p0"){
+    def table1 = "test_pk_no_need_read_data"
+
+    sql "drop table if exists ${table1}"
+
+    sql """
+       CREATE TABLE IF NOT EXISTS `${table1}` (
+      `date` date NULL COMMENT "",
+      `city` varchar(20) NULL COMMENT "",
+      `addr` varchar(20) NULL COMMENT "",
+      `name` varchar(20) NULL COMMENT "",
+      `compy` varchar(20) NULL COMMENT "",
+      `n` int NULL COMMENT "",
+      INDEX idx_city(city) USING INVERTED,
+      INDEX idx_addr(addr) USING INVERTED PROPERTIES("parser"="english"),
+      INDEX idx_n(n) USING INVERTED
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`date`)
+    COMMENT "OLAP"
+    DISTRIBUTED BY HASH(`date`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "in_memory" = "false",
+    "storage_format" = "V2"
+    )
+    """
+
+    sql """insert into ${table1} values
+            ('2017-10-01',null,'addr qie3','yy','lj',100),
+            ('2018-10-01',null,'hehe',null,'lala',200),
+            ('2019-10-01','beijing','addr xuanwu','wugui',null,300),
+            ('2020-10-01','beijing','addr fengtai','fengtai1','fengtai2',null),
+            ('2021-10-01','beijing','addr chaoyang','wangjing','donghuqu',500),
+            ('2022-10-01','shanghai','hehe',null,'haha',null),
+            ('2023-10-01','tengxun','qie','addr gg','lj',null),
+            ('2024-10-01','tengxun2','qie',null,'lj',800)
+    """
+
+    // case1: enable count on index
+    sql "set enable_count_on_index_pushdown = true"
+
+    qt_select_0 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
+    qt_select_1 "SELECT COUNT() FROM ${table1} WHERE year(date)='2017'"
+
+    // case1: disable count on index
+    sql "set enable_count_on_index_pushdown = false"
+
+    qt_select_2 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
+    qt_select_3 "SELECT COUNT() FROM ${table1} WHERE year(date)='2017'"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to