This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f27070c2d13 [fix](load) fix load plan when using WHERE clause with 
CompoundPredicate (#57128)
f27070c2d13 is described below

commit f27070c2d132b8995a2056d953476ad9c18b662c
Author: Xin Liao <[email protected]>
AuthorDate: Mon Oct 20 20:47:40 2025 +0800

    [fix](load) fix load plan when using WHERE clause with CompoundPredicate 
(#57128)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Refactored the nullability clearing logic in
    `NereidsLoadPlanInfoCollector.java` to recursively clear nullable
    information from an expression and all its children, ensuring correct
    nullability handling for complex filter expressions.
    
    ```
    F 2025-10-11 16:43:33,404 88673 status.h:466] Bad cast from 
type:doris::vectorized::ColumnNullable* to 
doris::vectorized::ColumnVector<(doris::PrimitiveType)2>*
    *** Check failure stack trace: ***
        @     0x5564186efacf  google::LogMessage::SendToLog()
        @     0x5564186e60e0  google::LogMessage::Flush()
        @     0x5564186e97d9  google::LogMessageFatal::~LogMessageFatal()
        @     0x5563fff5673c  doris::Status::FatalError<>()
        @     0x55640b78d501  
_ZZ11assert_castIPN5doris10vectorized12ColumnVectorILNS0_13PrimitiveTypeE2EEEL18TypeCheckOnRelease1EPNS1_7IColumnEET_OT1_ENKUlOS9_E_clIS8_EES5_SC_
        @     0x55640b78cd38  assert_cast<>()
        @     0x556411b8a62a  
_ZZN5doris10vectorized13VCompoundPred7executeEPNS0_12VExprContextEPNS0_5BlockEPiENKUlTnbvE_clILb0EEEDav
        @     0x556411b8204d  doris::vectorized::VCompoundPred::execute()
        @     0x556411c79d23  doris::vectorized::VExprContext::execute()
        @     0x556411c7fe9c  
doris::vectorized::VExprContext::execute_conjuncts()
        @     0x556411c7ed6b  
doris::vectorized::VExprContext::execute_conjuncts_and_filter_block()
        @     0x556411c7ea58  doris::vectorized::VExprContext::filter_block()
        @     0x5564119ca44a  doris::vectorized::Scanner::_filter_output_block()
        @     0x5564119c72fd  doris::vectorized::Scanner::get_block()
        @     0x5564119c660f  
doris::vectorized::Scanner::get_block_after_projects()
        @     0x5564119e26e4  
doris::vectorized::ScannerScheduler::_scanner_scan()
    ```
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [x] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [x] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 .../nereids/load/NereidsLoadPlanInfoCollector.java |  15 +-
 .../broker_load/test_s3_load_with_where.out        |  18 ++
 .../broker_load/test_s3_load_with_where.groovy     | 205 +++++++++++++++++++++
 3 files changed, 237 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
index 0459c4349b0..aadf65dcdd4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
@@ -404,7 +404,7 @@ public class NereidsLoadPlanInfoCollector extends 
DefaultPlanVisitor<Void, PlanT
             // in visitLogicalProject, we set project exprs nullability same 
as dest table columns
             // the conjunct's nullability is based on project exprs, so we 
need clear the nullable info
             // and let conjunct calculate the nullability by itself to get the 
correct nullable info
-            expr.clearNullableFromNereids();
+            clearNullableFromNereidsRecursively(expr);
             loadPlanInfo.postFilterExprList.add(expr);
         }
         filterPredicate = logicalFilter.getPredicate();
@@ -423,6 +423,19 @@ public class NereidsLoadPlanInfoCollector extends 
DefaultPlanVisitor<Void, PlanT
         return null;
     }
 
+    /**
+     * Recursively clear nullable info from expression and all its children
+     */
+    private void clearNullableFromNereidsRecursively(Expr expr) {
+        if (expr == null) {
+            return;
+        }
+        expr.clearNullableFromNereids();
+        for (Expr child : expr.getChildren()) {
+            clearNullableFromNereidsRecursively(child);
+        }
+    }
+
     @Override
     public Void visitLogicalPreFilter(LogicalPreFilter<? extends Plan> 
logicalPreFilter,
             PlanTranslatorContext context) {
diff --git 
a/regression-test/data/load_p0/broker_load/test_s3_load_with_where.out 
b/regression-test/data/load_p0/broker_load/test_s3_load_with_where.out
new file mode 100644
index 00000000000..74078a91999
--- /dev/null
+++ b/regression-test/data/load_p0/broker_load/test_s3_load_with_where.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      2023-09-01      1       1       1
+11001  2023-09-01      1       1       10
+11001  2023-09-01      1       2       10
+11001  2023-09-01      1       3       10
+11001  2023-09-01      2       1       10
+11001  2023-09-01      2       2       10
+11001  2023-09-01      2       3       10
+
+-- !select --
+1      2023-09-01      1       1       1
+11001  2023-09-01      1       1       10
+11001  2023-09-01      1       2       10
+11001  2023-09-01      1       3       10
+11001  2023-09-01      2       1       10
+11001  2023-09-01      2       2       10
+
diff --git 
a/regression-test/suites/load_p0/broker_load/test_s3_load_with_where.groovy 
b/regression-test/suites/load_p0/broker_load/test_s3_load_with_where.groovy
new file mode 100644
index 00000000000..c54eca340b1
--- /dev/null
+++ b/regression-test/suites/load_p0/broker_load/test_s3_load_with_where.groovy
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_s3_load_with_where", "load_p0") {
+    // define a sql table
+    def testTable = "tbl_s3_broker_load_with_where"
+    
+    def create_test_table = {testTablex ->
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+                `k1` BIGINT NOT NULL,
+                `k2` DATE NULL,
+                `k3` INT(11) NOT NULL,
+                `k4` INT(11) NOT NULL,
+                `v5` BIGINT SUM NULL DEFAULT "0"
+            ) ENGINE=OLAP
+            AGGREGATE KEY(`k1`, `k2`, `k3`, `k4`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 16
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+            );
+            """
+        
+        // DDL/DML return 1 row and 3 column, the only value is update row 
count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+        
+        // insert 1 row to check whether the table is ok
+        def result2 = sql """ INSERT INTO ${testTable} VALUES
+                        (1,'2023-09-01',1,1,1)
+                        """
+        assertTrue(result2.size() == 1)
+        assertTrue(result2[0].size() == 1)
+        assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+    }
+
+    def load_from_hdfs_norm = {testTablex, label, hdfsFilePath, format, 
brokerName, hdfsUser, hdfsPasswd ->
+        def result1= sql """
+                        LOAD LABEL ${label} (
+                            DATA INFILE("${hdfsFilePath}")
+                            INTO TABLE ${testTablex}
+                            COLUMNS TERMINATED BY ","
+                            FORMAT as "${format}"
+                        )
+                        with BROKER "${brokerName}" (
+                        "username"="${hdfsUser}",
+                        "password"="${hdfsPasswd}")
+                        PROPERTIES  (
+                        "timeout"="1200",
+                        "max_filter_ratio"="0.1");
+                        """
+
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+    }
+
+    def load_from_s3_norm = {testTablex, label, s3FilePath, format ->
+        def result1= sql """
+                        LOAD LABEL ${label} (
+                            DATA INFILE("${s3FilePath}")
+                            INTO TABLE ${testTablex}
+                            COLUMNS TERMINATED BY ","
+                            FORMAT AS "${format}"
+                        )
+                        WITH S3 (
+                            "AWS_ACCESS_KEY" = "${getS3AK()}",
+                            "AWS_SECRET_KEY" = "${getS3SK()}",
+                            "AWS_ENDPOINT" = "${getS3Endpoint()}",
+                            "AWS_REGION" = "${getS3Region()}",
+                            "PROVIDER" = "${getS3Provider()}"
+                        )
+                        PROPERTIES  (
+                        "timeout"="1200",
+                        "max_filter_ratio"="0.1");
+                        """
+
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+    }
+
+    def load_from_s3_with_or_predicate = {testTablex, label, s3FilePath, 
format ->
+        def result1= sql """
+                        LOAD LABEL ${label} (
+                            DATA INFILE("${s3FilePath}")
+                            INTO TABLE ${testTablex}
+                            COLUMNS TERMINATED BY ","
+                            FORMAT AS "${format}" 
+                            WHERE       
+                                k1 in (11001,11002)
+                                and (
+                                    k3 in (1)
+                                    or k4 in (1, 2)
+                                ) 
+                        )
+                        WITH S3 (
+                            "AWS_ACCESS_KEY" = "${getS3AK()}",
+                            "AWS_SECRET_KEY" = "${getS3SK()}",
+                            "AWS_ENDPOINT" = "${getS3Endpoint()}",
+                            "AWS_REGION" = "${getS3Region()}",
+                            "PROVIDER" = "${getS3Provider()}"
+                        )
+                        PROPERTIES  (
+                        "timeout"="1200",
+                        "max_filter_ratio"="0.1");
+                        """
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+    }
+    
+    def check_load_result = {checklabel, testTablex ->
+        def max_try_milli_secs = 10000
+        while(max_try_milli_secs) {
+            def result = sql "show load where label = '${checklabel}'"
+            if(result[0][2] == "FINISHED") {
+                //sql "sync"
+                qt_select "select * from ${testTablex} order by k1"
+                break
+            } else {
+                sleep(1000) // wait 1 second every time
+                max_try_milli_secs -= 1000
+                if(max_try_milli_secs <= 0) {
+                    assertEquals(1, 2)
+                }
+            }
+        }
+    }
+
+    def check_data_correct = {table_name ->
+        sql "sync"
+        // select the table and check whether the data is correct
+        qt_select "select k1,k3,k4,sum(v5) from ${table_name} group by 
k1,k3,k4 order by k1,k3,k4"
+    }
+
+    def s3_csv_file_path = 
"s3://${getS3BucketName()}/regression/load/data/broker_load_with_where.csv"
+
+    // case0: stream load csv data from local file system with where
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    create_test_table.call(testTable)
+
+    streamLoad {
+        table "${testTable}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1,k2,k3,k4,v5'
+        set 'where', """       
+                            k1 in (11001,11002)
+                            and (
+                                k3 in (1)
+                                or k4 in (1, 2)
+                            ) 
+                    """ 
+        file 'broker_load_with_where.csv'
+        time 10000 // limit inflight 10s
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(json.NumberTotalRows, json.NumberLoadedRows + 
json.NumberUnselectedRows
+                         + json.NumberFilteredRows)
+            assertEquals(5, json.NumberLoadedRows)
+            assertEquals(6, json.NumberTotalRows)
+        }
+    }
+ 
+    // case1: import csv data from s3 without where 
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    create_test_table.call(testTable)
+
+    def test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
+    load_from_s3_norm.call(testTable, test_load_label, s3_csv_file_path, "CSV")
+    
+    check_load_result.call(test_load_label, testTable)
+
+    // case2: import csv data from s3 with or predicate in where
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    create_test_table.call(testTable)
+
+    test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
+    load_from_s3_with_or_predicate.call(testTable, test_load_label, 
s3_csv_file_path, "CSV")
+
+    check_load_result.call(test_load_label, testTable)
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to