This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 6de8e4be862 [fix] (nereids) fix Match Expreesion in filter estimation
(#39050) (#39216)
6de8e4be862 is described below
commit 6de8e4be8621f06ada8a3b560322cca363f431ba
Author: Sun Chenyang <[email protected]>
AuthorDate: Tue Aug 13 10:55:41 2024 +0800
[fix] (nereids) fix Match Expreesion in filter estimation (#39050) (#39216)
## Proposed changes
pick from master #39050
---
.../doris/nereids/stats/FilterEstimation.java | 7 ++-
.../data/inverted_index_p0/test_or_not_match.out | 5 ++
.../inverted_index_p0/test_or_not_match.groovy | 69 ++++++++++++++++++++++
3 files changed, 80 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 0ce10ec0c3c..b4b4fa5e3f8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull;
import org.apache.doris.nereids.trees.expressions.LessThan;
import org.apache.doris.nereids.trees.expressions.LessThanEqual;
import org.apache.doris.nereids.trees.expressions.Like;
+import org.apache.doris.nereids.trees.expressions.Match;
import org.apache.doris.nereids.trees.expressions.Not;
import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
import org.apache.doris.nereids.trees.expressions.Or;
@@ -480,7 +481,8 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
child instanceof EqualPredicate
|| child instanceof InPredicate
|| child instanceof IsNull
- || child instanceof Like,
+ || child instanceof Like
+ || child instanceof Match,
"Not-predicate meet unexpected child: %s",
child.toSql());
if (child instanceof Like) {
rowCount = context.statistics.getRowCount() -
childStats.getRowCount();
@@ -503,6 +505,9 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
.setMinExpr(originColStats.minExpr)
.setMaxValue(originColStats.maxValue)
.setMaxExpr(originColStats.maxExpr);
+ } else if (child instanceof Match) {
+ rowCount = context.statistics.getRowCount() -
childStats.getRowCount();
+ colBuilder.setNdv(Math.max(1.0, originColStats.ndv -
childColStats.ndv));
}
if (not.child().getInputSlots().size() == 1 && !(child
instanceof IsNull)) {
// only consider the single column numNull, otherwise,
ignore
diff --git a/regression-test/data/inverted_index_p0/test_or_not_match.out
b/regression-test/data/inverted_index_p0/test_or_not_match.out
new file mode 100644
index 00000000000..22dde7a8bf7
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_or_not_match.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+GET / HTTP/1.0
+GET / HTTP/1.0
+
diff --git a/regression-test/suites/inverted_index_p0/test_or_not_match.groovy
b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy
new file mode 100644
index 00000000000..95af26480c9
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_or_not_match", "p0") {
+ def tableName = "test_or_not_match"
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql """
+ CREATE TABLE ${tableName} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` varchar(20) NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` int(11) NULL COMMENT "",
+ `size` int(11) NULL COMMENT "",
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ // load the json data
+ streamLoad {
+ table "${tableName}"
+
+ // set http request header params
+ set 'read_json_by_line', 'true'
+ set 'format', 'json'
+ file 'documents-1000.json' // import json file
+ time 10000 // limit inflight 10s
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+ }
+ }
+ for (int i = 0; i < 10; i++) {
+ sql "select request from ${tableName} where request like '1.0' or not
request MATCH 'GETA';"
+ }
+
+ sql "set enable_nereids_planner = true"
+ sql "set enable_fallback_to_original_planner = false"
+
+ qt_sql "select request from ${tableName} where request like '1.0' or not
request MATCH 'GETA' order by request limit 2;"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]