This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a17f3e295ae [fix](inverted index) Fix errors caused by 
enable_need_read_data_opt (#42064)
a17f3e295ae is described below

commit a17f3e295aef67ffa3444ecc1570e04d6f8ad0d4
Author: Sun Chenyang <[email protected]>
AuthorDate: Tue Oct 22 11:28:07 2024 +0800

    [fix](inverted index) Fix errors caused by enable_need_read_data_opt 
(#42064)
    
    ## Proposed changes
    
    In the current backend implementation, it is not correctly handle counts
    when `is null` predicate exists, so we forbid the storage layer's count
    currently.
    
    for example:
    ```
    select count(b) from test where b is null
    ```
    
    When apply the is null filter, the result array will fill the default
    value of the column type and the count operator can not detect whether
    the result is null, so the count operator compute the wrong result.
---
 .../rules/implementation/AggregateStrategies.java  |  22 ++-
 .../data/inverted_index_p0/test_index_rqg_bug8.out |   4 +
 .../inverted_index_p0/test_index_rqg_bug8.groovy   | 149 +++++++++++++++++++++
 3 files changed, 173 insertions(+), 2 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 0f6230a5fb4..aeff011fe07 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -130,7 +130,8 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                     Set<Slot> aggSlots = funcs.stream()
                             .flatMap(f -> f.getInputSlots().stream())
                             .collect(Collectors.toSet());
-                    return conjuncts.stream().allMatch(expr -> 
checkSlotInOrExpression(expr, aggSlots));
+                    return conjuncts.stream().allMatch(expr -> 
checkSlotInOrExpression(expr, aggSlots)
+                                                                && 
checkIsNullExpr(expr, aggSlots));
                 })
                 .thenApply(ctx -> {
                     LogicalAggregate<LogicalFilter<LogicalOlapScan>> agg = 
ctx.root;
@@ -163,7 +164,8 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                     Set<Slot> aggSlots = funcs.stream()
                             .flatMap(f -> f.getInputSlots().stream())
                             .collect(Collectors.toSet());
-                    return conjuncts.stream().allMatch(expr -> 
checkSlotInOrExpression(expr, aggSlots));
+                    return conjuncts.stream().allMatch(expr -> 
checkSlotInOrExpression(expr, aggSlots)
+                                                                && 
checkIsNullExpr(expr, aggSlots));
                 })
                 .thenApply(ctx -> {
                     
LogicalAggregate<LogicalProject<LogicalFilter<LogicalOlapScan>>> agg = ctx.root;
@@ -492,6 +494,22 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
         return true;
     }
 
+    private boolean checkIsNullExpr(Expression expr, Set<Slot> aggSlots) {
+        if (expr instanceof IsNull) {
+            Set<Slot> slots = expr.getInputSlots();
+            if (slots.stream().anyMatch(aggSlots::contains)) {
+                return false;
+            }
+        } else {
+            for (Expression child : expr.children()) {
+                if (!checkIsNullExpr(child, aggSlots)) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
     private boolean isDupOrMowKeyTable(LogicalOlapScan logicalScan) {
         if (logicalScan != null) {
             KeysType keysType = logicalScan.getTable().getKeysType();
diff --git a/regression-test/data/inverted_index_p0/test_index_rqg_bug8.out 
b/regression-test/data/inverted_index_p0/test_index_rqg_bug8.out
new file mode 100644
index 00000000000..a21f3b8748e
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug8.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+0
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug8.groovy 
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug8.groovy
new file mode 100644
index 00000000000..3e54a10b326
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug8.groovy
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug8", "test_index_rqg_bug8"){
+    def table1 = "test_index_rqg_bug8"
+
+    sql "drop table if exists ${table1}"
+
+    sql """
+            CREATE TABLE ${table1} (
+        `pk` int NULL,
+        `col_int_undef_signed_index_inverted` int NULL,
+        `col_boolean_undef_signed` boolean NULL,
+        `col_boolean_undef_signed_not_null` boolean NOT NULL,
+        `col_tinyint_undef_signed` tinyint NULL,
+        `col_tinyint_undef_signed_index_inverted` tinyint NULL,
+        `col_tinyint_undef_signed_not_null` tinyint NOT NULL,
+        `col_tinyint_undef_signed_not_null_index_inverted` tinyint NOT NULL,
+        `col_smallint_undef_signed` smallint NULL,
+        `col_smallint_undef_signed_index_inverted` smallint NULL,
+        `col_smallint_undef_signed_not_null` smallint NOT NULL,
+        `col_smallint_undef_signed_not_null_index_inverted` smallint NOT NULL,
+        `col_int_undef_signed` int NULL,
+        `col_int_undef_signed_not_null` int NOT NULL,
+        `col_int_undef_signed_not_null_index_inverted` int NOT NULL,
+        `col_bigint_undef_signed` bigint NULL,
+        `col_bigint_undef_signed_index_inverted` bigint NULL,
+        `col_bigint_undef_signed_not_null` bigint NOT NULL,
+        `col_bigint_undef_signed_not_null_index_inverted` bigint NOT NULL,
+        `col_decimal_16__8__undef_signed` decimal(16,8) NULL,
+        `col_decimal_16__8__undef_signed_index_inverted` decimal(16,8) NULL,
+        `col_decimal_16__8__undef_signed_not_null` decimal(16,8) NOT NULL,
+        `col_decimal_16__8__undef_signed_not_null_index_inverted` 
decimal(16,8) NOT NULL,
+        `col_decimal_38__9__undef_signed` decimal(38,9) NULL,
+        `col_decimal_38__9__undef_signed_index_inverted` decimal(38,9) NULL,
+        `col_decimal_38__9__undef_signed_not_null` decimal(38,9) NOT NULL,
+        `col_decimal_38__9__undef_signed_not_null_index_inverted` 
decimal(38,9) NOT NULL,
+        `col_decimal_38__30__undef_signed` decimal(38,30) NULL,
+        `col_decimal_38__30__undef_signed_index_inverted` decimal(38,30) NULL,
+        `col_decimal_38__30__undef_signed_not_null` decimal(38,30) NOT NULL,
+        `col_decimal_38__30__undef_signed_not_null_index_inverted` 
decimal(38,30) NOT NULL,
+        `col_date_undef_signed` date NULL,
+        `col_date_undef_signed_index_inverted` date NULL,
+        `col_date_undef_signed_not_null` date NOT NULL,
+        `col_date_undef_signed_not_null_index_inverted` date NOT NULL,
+        `col_datetime_undef_signed` datetime NULL,
+        `col_datetime_undef_signed_index_inverted` datetime NULL,
+        `col_datetime_undef_signed_not_null` datetime NOT NULL,
+        `col_datetime_undef_signed_not_null_index_inverted` datetime NOT NULL,
+        `col_datetime_3__undef_signed` datetime(3) NULL,
+        `col_datetime_3__undef_signed_index_inverted` datetime(3) NULL,
+        `col_datetime_3__undef_signed_not_null` datetime(3) NOT NULL,
+        `col_datetime_3__undef_signed_not_null_index_inverted` datetime(3) NOT 
NULL,
+        `col_datetime_6__undef_signed` datetime(6) NULL,
+        `col_datetime_6__undef_signed_index_inverted` datetime(6) NULL,
+        `col_datetime_6__undef_signed_not_null` datetime(6) NOT NULL,
+        `col_datetime_6__undef_signed_not_null_index_inverted` datetime(6) NOT 
NULL,
+        `col_char_255__undef_signed` character(255) NULL,
+        `col_char_255__undef_signed_index_inverted` character(255) NULL,
+        `col_char_255__undef_signed_index_inverted_p_e` character(255) NULL,
+        `col_char_255__undef_signed_index_inverted_p_u` character(255) NULL,
+        `col_char_255__undef_signed_not_null` character(255) NOT NULL,
+        `col_char_255__undef_signed_not_null_index_inverted` character(255) 
NOT NULL,
+        `col_char_255__undef_signed_not_null_index_inverted_p_e` 
character(255) NOT NULL,
+        `col_char_255__undef_signed_not_null_index_inverted_p_u` 
character(255) NOT NULL,
+        `col_varchar_1024__undef_signed` varchar(1024) NULL,
+        `col_varchar_1024__undef_signed_index_inverted` varchar(1024) NULL,
+        `col_varchar_1024__undef_signed_index_inverted_p_e` varchar(1024) NULL,
+        `col_varchar_1024__undef_signed_index_inverted_p_u` varchar(1024) NULL,
+        `col_varchar_1024__undef_signed_not_null` varchar(1024) NOT NULL,
+        `col_varchar_1024__undef_signed_not_null_index_inverted` varchar(1024) 
NOT NULL,
+        `col_varchar_1024__undef_signed_not_null_index_inverted_p_e` 
varchar(1024) NOT NULL,
+        `col_varchar_1024__undef_signed_not_null_index_inverted_p_u` 
varchar(1024) NOT NULL,
+        INDEX col_tinyint_undef_signed_index_inverted_idx 
(`col_tinyint_undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_tinyint_undef_signed_not_null_index_inverted_idx 
(`col_tinyint_undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_smallint_undef_signed_index_inverted_idx 
(`col_smallint_undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_smallint_undef_signed_not_null_index_inverted_idx 
(`col_smallint_undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_decimal_16__8__undef_signed_index_inverted_idx 
(`col_decimal_16__8__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_decimal_16__8__undef_signed_not_null_index_inverted_idx 
(`col_decimal_16__8__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_decimal_38__9__undef_signed_index_inverted_idx 
(`col_decimal_38__9__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_decimal_38__9__undef_signed_not_null_index_inverted_idx 
(`col_decimal_38__9__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_decimal_38__30__undef_signed_index_inverted_idx 
(`col_decimal_38__30__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_decimal_38__30__undef_signed_not_null_index_inverted_idx 
(`col_decimal_38__30__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_datetime_undef_signed_index_inverted_idx 
(`col_datetime_undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_datetime_undef_signed_not_null_index_inverted_idx 
(`col_datetime_undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_datetime_3__undef_signed_index_inverted_idx 
(`col_datetime_3__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_datetime_3__undef_signed_not_null_index_inverted_idx 
(`col_datetime_3__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_datetime_6__undef_signed_index_inverted_idx 
(`col_datetime_6__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_datetime_6__undef_signed_not_null_index_inverted_idx 
(`col_datetime_6__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_char_255__undef_signed_index_inverted_idx 
(`col_char_255__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_char_255__undef_signed_index_inverted_p_e_idx 
(`col_char_255__undef_signed_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_char_255__undef_signed_index_inverted_p_u_idx 
(`col_char_255__undef_signed_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_char_255__undef_signed_not_null_index_inverted_idx 
(`col_char_255__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_char_255__undef_signed_not_null_index_inverted_p_e_idx 
(`col_char_255__undef_signed_not_null_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_char_255__undef_signed_not_null_index_inverted_p_u_idx 
(`col_char_255__undef_signed_not_null_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+        INDEX col_varchar_1024__undef_signed_index_inverted_p_e_idx 
(`col_varchar_1024__undef_signed_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_varchar_1024__undef_signed_index_inverted_p_u_idx 
(`col_varchar_1024__undef_signed_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED,
+        INDEX col_varchar_1024__undef_signed_not_null_index_inverted_p_e_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted_p_e`) USING INVERTED 
PROPERTIES("parser" = "english", "lower_case" = "true", "support_phrase" = 
"true"),
+        INDEX col_varchar_1024__undef_signed_not_null_index_inverted_p_u_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted_p_u`) USING INVERTED 
PROPERTIES("parser" = "unicode", "lower_case" = "true", "support_phrase" = 
"true")
+        ) ENGINE=OLAP
+        UNIQUE KEY(`pk`, `col_int_undef_signed_index_inverted`)
+        DISTRIBUTED BY HASH(`pk`) BUCKETS 10
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "min_load_replica_num" = "-1",
+        "bloom_filter_columns" = "col_varchar_1024__undef_signed, 
col_date_undef_signed_not_null, col_date_undef_signed, col_int_undef_signed, 
col_varchar_1024__undef_signed_not_null, col_int_undef_signed_not_null",
+        "is_being_synced" = "false",
+        "storage_medium" = "hdd",
+        "storage_format" = "V2",
+        "inverted_index_storage_format" = "V1",
+        "enable_unique_key_merge_on_write" = "true",
+        "light_schema_change" = "true",
+        "disable_auto_compaction" = "false",
+        "enable_single_replica_compaction" = "false",
+        "group_commit_interval_ms" = "10000",
+        "group_commit_data_bytes" = "134217728",
+        "enable_mow_light_delete" = "false"
+        );
+    """
+
+    sql """ insert into ${table1} 
(pk,col_boolean_undef_signed,col_boolean_undef_signed_not_null,col_tinyint_undef_signed,col_tinyint_undef_signed_index_inverted,col_tinyint_undef_signed_not_null,col_tinyint_undef_signed_not_null_index_inverted,col_smallint_undef_signed,col_smallint_undef_signed_index_inverted,col_smallint_undef_signed_not_null,col_smallint_undef_signed_not_null_index_inverted,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_
 [...]
+
+    qt_sql """
+        SELECT count(col_decimal_16__8__undef_signed_index_inverted) FROM 
${table1} where col_decimal_16__8__undef_signed_index_inverted is null;
+    """
+    
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to