This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f27070c2d13 [fix](load) fix load plan when using WHERE clause with
CompoundPredicate (#57128)
f27070c2d13 is described below
commit f27070c2d132b8995a2056d953476ad9c18b662c
Author: Xin Liao <[email protected]>
AuthorDate: Mon Oct 20 20:47:40 2025 +0800
[fix](load) fix load plan when using WHERE clause with CompoundPredicate
(#57128)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Refactored the nullability clearing logic in
`NereidsLoadPlanInfoCollector.java` to recursively clear nullable
information from an expression and all its children, ensuring correct
nullability handling for complex filter expressions.
```
F 2025-10-11 16:43:33,404 88673 status.h:466] Bad cast from
type:doris::vectorized::ColumnNullable* to
doris::vectorized::ColumnVector<(doris::PrimitiveType)2>*
*** Check failure stack trace: ***
@ 0x5564186efacf google::LogMessage::SendToLog()
@ 0x5564186e60e0 google::LogMessage::Flush()
@ 0x5564186e97d9 google::LogMessageFatal::~LogMessageFatal()
@ 0x5563fff5673c doris::Status::FatalError<>()
@ 0x55640b78d501
_ZZ11assert_castIPN5doris10vectorized12ColumnVectorILNS0_13PrimitiveTypeE2EEEL18TypeCheckOnRelease1EPNS1_7IColumnEET_OT1_ENKUlOS9_E_clIS8_EES5_SC_
@ 0x55640b78cd38 assert_cast<>()
@ 0x556411b8a62a
_ZZN5doris10vectorized13VCompoundPred7executeEPNS0_12VExprContextEPNS0_5BlockEPiENKUlTnbvE_clILb0EEEDav
@ 0x556411b8204d doris::vectorized::VCompoundPred::execute()
@ 0x556411c79d23 doris::vectorized::VExprContext::execute()
@ 0x556411c7fe9c
doris::vectorized::VExprContext::execute_conjuncts()
@ 0x556411c7ed6b
doris::vectorized::VExprContext::execute_conjuncts_and_filter_block()
@ 0x556411c7ea58 doris::vectorized::VExprContext::filter_block()
@ 0x5564119ca44a doris::vectorized::Scanner::_filter_output_block()
@ 0x5564119c72fd doris::vectorized::Scanner::get_block()
@ 0x5564119c660f
doris::vectorized::Scanner::get_block_after_projects()
@ 0x5564119e26e4
doris::vectorized::ScannerScheduler::_scanner_scan()
```
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [x] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [x] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [x] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../nereids/load/NereidsLoadPlanInfoCollector.java | 15 +-
.../broker_load/test_s3_load_with_where.out | 18 ++
.../broker_load/test_s3_load_with_where.groovy | 205 +++++++++++++++++++++
3 files changed, 237 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
index 0459c4349b0..aadf65dcdd4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java
@@ -404,7 +404,7 @@ public class NereidsLoadPlanInfoCollector extends
DefaultPlanVisitor<Void, PlanT
// in visitLogicalProject, we set project exprs nullability same
as dest table columns
// the conjunct's nullability is based on project exprs, so we
need clear the nullable info
// and let conjunct calculate the nullability by itself to get the
correct nullable info
- expr.clearNullableFromNereids();
+ clearNullableFromNereidsRecursively(expr);
loadPlanInfo.postFilterExprList.add(expr);
}
filterPredicate = logicalFilter.getPredicate();
@@ -423,6 +423,19 @@ public class NereidsLoadPlanInfoCollector extends
DefaultPlanVisitor<Void, PlanT
return null;
}
+ /**
+ * Recursively clear nullable info from expression and all its children
+ */
+ private void clearNullableFromNereidsRecursively(Expr expr) {
+ if (expr == null) {
+ return;
+ }
+ expr.clearNullableFromNereids();
+ for (Expr child : expr.getChildren()) {
+ clearNullableFromNereidsRecursively(child);
+ }
+ }
+
@Override
public Void visitLogicalPreFilter(LogicalPreFilter<? extends Plan>
logicalPreFilter,
PlanTranslatorContext context) {
diff --git
a/regression-test/data/load_p0/broker_load/test_s3_load_with_where.out
b/regression-test/data/load_p0/broker_load/test_s3_load_with_where.out
new file mode 100644
index 00000000000..74078a91999
--- /dev/null
+++ b/regression-test/data/load_p0/broker_load/test_s3_load_with_where.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 2023-09-01 1 1 1
+11001 2023-09-01 1 1 10
+11001 2023-09-01 1 2 10
+11001 2023-09-01 1 3 10
+11001 2023-09-01 2 1 10
+11001 2023-09-01 2 2 10
+11001 2023-09-01 2 3 10
+
+-- !select --
+1 2023-09-01 1 1 1
+11001 2023-09-01 1 1 10
+11001 2023-09-01 1 2 10
+11001 2023-09-01 1 3 10
+11001 2023-09-01 2 1 10
+11001 2023-09-01 2 2 10
+
diff --git
a/regression-test/suites/load_p0/broker_load/test_s3_load_with_where.groovy
b/regression-test/suites/load_p0/broker_load/test_s3_load_with_where.groovy
new file mode 100644
index 00000000000..c54eca340b1
--- /dev/null
+++ b/regression-test/suites/load_p0/broker_load/test_s3_load_with_where.groovy
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_s3_load_with_where", "load_p0") {
+ // define a sql table
+ def testTable = "tbl_s3_broker_load_with_where"
+
+ def create_test_table = {testTablex ->
+ def result1 = sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ `k1` BIGINT NOT NULL,
+ `k2` DATE NULL,
+ `k3` INT(11) NOT NULL,
+ `k4` INT(11) NOT NULL,
+ `v5` BIGINT SUM NULL DEFAULT "0"
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`k1`, `k2`, `k3`, `k4`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 16
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // DDL/DML return 1 row and 3 column, the only value is update row
count
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+ // insert 1 row to check whether the table is ok
+ def result2 = sql """ INSERT INTO ${testTable} VALUES
+ (1,'2023-09-01',1,1,1)
+ """
+ assertTrue(result2.size() == 1)
+ assertTrue(result2[0].size() == 1)
+ assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+ }
+
+ def load_from_hdfs_norm = {testTablex, label, hdfsFilePath, format,
brokerName, hdfsUser, hdfsPasswd ->
+ def result1= sql """
+ LOAD LABEL ${label} (
+ DATA INFILE("${hdfsFilePath}")
+ INTO TABLE ${testTablex}
+ COLUMNS TERMINATED BY ","
+ FORMAT as "${format}"
+ )
+ with BROKER "${brokerName}" (
+ "username"="${hdfsUser}",
+ "password"="${hdfsPasswd}")
+ PROPERTIES (
+ "timeout"="1200",
+ "max_filter_ratio"="0.1");
+ """
+
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+ }
+
+ def load_from_s3_norm = {testTablex, label, s3FilePath, format ->
+ def result1= sql """
+ LOAD LABEL ${label} (
+ DATA INFILE("${s3FilePath}")
+ INTO TABLE ${testTablex}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "${format}"
+ )
+ WITH S3 (
+ "AWS_ACCESS_KEY" = "${getS3AK()}",
+ "AWS_SECRET_KEY" = "${getS3SK()}",
+ "AWS_ENDPOINT" = "${getS3Endpoint()}",
+ "AWS_REGION" = "${getS3Region()}",
+ "PROVIDER" = "${getS3Provider()}"
+ )
+ PROPERTIES (
+ "timeout"="1200",
+ "max_filter_ratio"="0.1");
+ """
+
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+ }
+
+ def load_from_s3_with_or_predicate = {testTablex, label, s3FilePath,
format ->
+ def result1= sql """
+ LOAD LABEL ${label} (
+ DATA INFILE("${s3FilePath}")
+ INTO TABLE ${testTablex}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "${format}"
+ WHERE
+ k1 in (11001,11002)
+ and (
+ k3 in (1)
+ or k4 in (1, 2)
+ )
+ )
+ WITH S3 (
+ "AWS_ACCESS_KEY" = "${getS3AK()}",
+ "AWS_SECRET_KEY" = "${getS3SK()}",
+ "AWS_ENDPOINT" = "${getS3Endpoint()}",
+ "AWS_REGION" = "${getS3Region()}",
+ "PROVIDER" = "${getS3Provider()}"
+ )
+ PROPERTIES (
+ "timeout"="1200",
+ "max_filter_ratio"="0.1");
+ """
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Query OK, 0 rows affected")
+ }
+
+ def check_load_result = {checklabel, testTablex ->
+ def max_try_milli_secs = 10000
+ while(max_try_milli_secs) {
+ def result = sql "show load where label = '${checklabel}'"
+ if(result[0][2] == "FINISHED") {
+ //sql "sync"
+ qt_select "select * from ${testTablex} order by k1"
+ break
+ } else {
+ sleep(1000) // wait 1 second every time
+ max_try_milli_secs -= 1000
+ if(max_try_milli_secs <= 0) {
+ assertEquals(1, 2)
+ }
+ }
+ }
+ }
+
+ def check_data_correct = {table_name ->
+ sql "sync"
+ // select the table and check whether the data is correct
+ qt_select "select k1,k3,k4,sum(v5) from ${table_name} group by
k1,k3,k4 order by k1,k3,k4"
+ }
+
+ def s3_csv_file_path =
"s3://${getS3BucketName()}/regression/load/data/broker_load_with_where.csv"
+
+ // case0: stream load csv data from local file system with where
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ create_test_table.call(testTable)
+
+ streamLoad {
+ table "${testTable}"
+
+ set 'column_separator', ','
+ set 'columns', 'k1,k2,k3,k4,v5'
+ set 'where', """
+ k1 in (11001,11002)
+ and (
+ k3 in (1)
+ or k4 in (1, 2)
+ )
+ """
+ file 'broker_load_with_where.csv'
+ time 10000 // limit inflight 10s
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(json.NumberTotalRows, json.NumberLoadedRows +
json.NumberUnselectedRows
+ + json.NumberFilteredRows)
+ assertEquals(5, json.NumberLoadedRows)
+ assertEquals(6, json.NumberTotalRows)
+ }
+ }
+
+ // case1: import csv data from s3 without where
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ create_test_table.call(testTable)
+
+ def test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
+ load_from_s3_norm.call(testTable, test_load_label, s3_csv_file_path, "CSV")
+
+ check_load_result.call(test_load_label, testTable)
+
+ // case2: import csv data from s3 with or predicate in where
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ create_test_table.call(testTable)
+
+ test_load_label = UUID.randomUUID().toString().replaceAll("-", "")
+ load_from_s3_with_or_predicate.call(testTable, test_load_label,
s3_csv_file_path, "CSV")
+
+ check_load_result.call(test_load_label, testTable)
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]