This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8d74176 [Optimize] Check invalid datetime to avoid scanning a lots of
partitions (#5643)
8d74176 is described below
commit 8d7417697035ea2972769ccf8f820dcf479bfd39
Author: xinghuayu007 <[email protected]>
AuthorDate: Wed May 19 09:25:58 2021 +0800
[Optimize] Check invalid datetime to avoid scanning a lots of partitions
(#5643)
Support parsing date format `'%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
Support handling date time with nanoseconds
---
.../java/org/apache/doris/analysis/Analyzer.java | 2 +
.../org/apache/doris/analysis/DateLiteral.java | 15 ++-
...implifyInvalidDateBinaryPredicatesDateRule.java | 63 ++++++++++++
.../org/apache/doris/analysis/DateLiteralTest.java | 14 +++
.../org/apache/doris/planner/QueryPlanTest.java | 110 ++++++++++++++++++++-
5 files changed, 202 insertions(+), 2 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
index dabf2f5..e5f872f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
@@ -41,6 +41,7 @@ import org.apache.doris.rewrite.ExprRewriter;
import org.apache.doris.rewrite.FoldConstantsRule;
import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
+import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule;
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap;
import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule;
import org.apache.doris.rewrite.mvrewrite.CountFieldToSum;
@@ -257,6 +258,7 @@ public class Analyzer {
rules.add(NormalizeBinaryPredicatesRule.INSTANCE);
rules.add(FoldConstantsRule.INSTANCE);
rules.add(RewriteFromUnixTimeRule.INSTANCE);
+ rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
exprRewriter_ = new ExprRewriter(rules);
// init mv rewriter
List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 4025c61..eceef37 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -63,8 +63,12 @@ public class DateLiteral extends LiteralExpr {
private static final int DATEKEY_LENGTH = 8;
private static final int MAX_MICROSECOND = 999999;
+ private static final int DATETIME_TO_MINUTE_STRING_LENGTH = 16;
+ private static final int DATETIME_TO_HOUR_STRING_LENGTH = 13;
private static DateTimeFormatter DATE_TIME_FORMATTER = null;
+ private static DateTimeFormatter DATE_TIME_FORMATTER_TO_HOUR = null;
+ private static DateTimeFormatter DATE_TIME_FORMATTER_TO_MINUTE = null;
private static DateTimeFormatter DATE_FORMATTER = null;
/*
* Dates containing two-digit year values are ambiguous because the
century is unknown.
@@ -90,6 +94,8 @@ public class DateLiteral extends LiteralExpr {
static {
try {
DATE_TIME_FORMATTER = formatBuilder("%Y-%m-%d
%H:%i:%s").toFormatter();
+ DATE_TIME_FORMATTER_TO_HOUR = formatBuilder("%Y-%m-%d
%H").toFormatter();
+ DATE_TIME_FORMATTER_TO_MINUTE = formatBuilder("%Y-%m-%d
%H:%i").toFormatter();
DATE_FORMATTER = formatBuilder("%Y-%m-%d").toFormatter();
DATEKEY_FORMATTER = formatBuilder("%Y%m%d").toFormatter();
DATE_TIME_FORMATTER_TWO_DIGIT = formatBuilder("%y-%m-%d
%H:%i:%s").toFormatter();
@@ -268,7 +274,14 @@ public class DateLiteral extends LiteralExpr {
if (s.split("-")[0].length() == 2) {
dateTime =
DATE_TIME_FORMATTER_TWO_DIGIT.parseLocalDateTime(s);
} else {
- dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
+ // parse format '%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
+ if (s.length() == DATETIME_TO_MINUTE_STRING_LENGTH) {
+ dateTime =
DATE_TIME_FORMATTER_TO_MINUTE.parseLocalDateTime(s);
+ } else if (s.length() == DATETIME_TO_HOUR_STRING_LENGTH) {
+ dateTime =
DATE_TIME_FORMATTER_TO_HOUR.parseLocalDateTime(s);
+ } else {
+ dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
+ }
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java
b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java
new file mode 100644
index 0000000..c049f96
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.rewrite;
+
+import org.apache.doris.analysis.Analyzer;
+import org.apache.doris.analysis.BinaryPredicate;
+import org.apache.doris.analysis.CastExpr;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.NullLiteral;
+import org.apache.doris.common.AnalysisException;
+
+/**
+ * this rule try to convert date expression, if date is invalid, it will be
+ * converted into null literal to avoid to scan all partitions
+ * if a date data is invalid or contains nanosecond, it will be convert into
CastExpr
+ * only support rewriting pattern: slot + operator + date literal
+ * Examples:
+ * date = "2020-10-32" => NULL
+ */
+public class SimplifyInvalidDateBinaryPredicatesDateRule implements
ExprRewriteRule {
+ public static ExprRewriteRule INSTANCE = new
SimplifyInvalidDateBinaryPredicatesDateRule();
+ public static final int DATETIME_STRING_MAX_LENGTH = new
String("yyyy-MM-dd HH:ii:ss").length();
+
+ @Override
+ public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+ if (!(expr instanceof BinaryPredicate)) return expr;
+ Expr lchild = expr.getChild(0);
+ if (!lchild.getType().isDateType()) {
+ return expr;
+ }
+ Expr valueExpr = expr.getChild(1);
+ if (!valueExpr.getType().isDateType()) {
+ return expr;
+ }
+ if (!valueExpr.isConstant()) {
+ return expr;
+ }
+ if (valueExpr instanceof CastExpr) {
+ String dateStr = valueExpr.toSql();
+ // if it contains millisecond, microsecond, nanosecond, do nothing
+ if (dateStr.length() > DATETIME_STRING_MAX_LENGTH &&
dateStr.contains(".")) {
+ return expr;
+ }
+ return new NullLiteral();
+ }
+ return expr;
+ }
+}
\ No newline at end of file
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
index 9b94f9d..b55ebc8 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
@@ -59,6 +59,20 @@ public class DateLiteralTest {
}
@Test
+ public void testParseDateTimeToHourORMinute() throws Exception{
+ String s = "2020-12-13 12:13:14";
+ Type type = Type.DATETIME;
+ DateLiteral literal = new DateLiteral(s, type);
+ Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:14"));
+ s = "2020-12-13 12:13";
+ literal = new DateLiteral(s, type);
+ Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:00"));
+ s = "2020-12-13 12";
+ literal = new DateLiteral(s, type);
+ Assert.assertTrue(literal.toSql().contains("2020-12-13 12:00:00"));
+ }
+
+ @Test
public void uncheckedCastTo() {
boolean hasException = false;
try {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
index 3ce7198..f334377 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@@ -1467,9 +1467,117 @@ public class QueryPlanTest {
//format less than
sql = "select * from test1 where from_unixtime(query_time,
'yyyy-MM-dd') < '2021-03-02 10:01:28'";
explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
- System.out.println("wangxixu-explain:"+explainString);
Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <
1614614400, `query_time` >= 0"));
}
+ @Test
+ public void testCheckInvalidDate() throws Exception {
+ FeConstants.runningUnitTest = true;
+ connectContext.setDatabase("default_cluster:test");
+ //valid date
+ String sql = "select day from tbl_int_date where day = '2020-10-30'";
+ String explainString =
UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `day` =
'2020-10-30 00:00:00'"));
+ sql = "select day from tbl_int_date where day =
from_unixtime(1196440219)";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `day` =
'2007-12-01 00:30:19'"));
+ sql = "select day from tbl_int_date where day =
str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s');";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `day` =
'2014-12-21 12:34:56'"));
+ //valid date
+ sql = "select day from tbl_int_date where day = 20201030";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `day` =
'2020-10-30 00:00:00'"));
+ //valid date
+ sql = "select day from tbl_int_date where day = '20201030'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `day` =
'2020-10-30 00:00:00'"));
+ //valid date contains micro second
+ sql = "select day from tbl_int_date where day = '2020-10-30
10:00:01.111111'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `day` =
'2020-10-30 10:00:01.111111'"));
+ //invalid date
+ sql = "select day from tbl_int_date where day = '2020-10-32'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid date
+ sql = "select day from tbl_int_date where day = '20201032'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid date
+ sql = "select day from tbl_int_date where day = 20201032";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid date
+ sql = "select day from tbl_int_date where day = 'hello'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid date
+ sql = "select day from tbl_int_date where day = 2020-10-30";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid date
+ sql = "select day from tbl_int_date where day = 10-30";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+
+
+ //valid datetime
+ sql = "select day from tbl_int_date where date = '2020-10-30
12:12:30'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 12:12:30'"));
+ //valid datetime, support parsing to minute
+ sql = "select day from tbl_int_date where date = '2020-10-30 12:12'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 12:12:00'"));
+ //valid datetime, support parsing to hour
+ sql = "select day from tbl_int_date where date = '2020-10-30 12'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 12:00:00'"));
+ //valid datetime
+ sql = "select day from tbl_int_date where date = 20201030";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 00:00:00'"));
+ //valid datetime
+ sql = "select day from tbl_int_date where date = '20201030'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 00:00:00'"));
+ //valid datetime
+ sql = "select day from tbl_int_date where date = '2020-10-30'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 00:00:00'"));
+ //valid datetime contains micro second
+ sql = "select day from tbl_int_date where date = '2020-10-30
10:00:01.111111'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("PREDICATES: `date` =
'2020-10-30 10:00:01.111111'"));
+ //invalid datetime
+ sql = "select day from tbl_int_date where date = '2020-10-32'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid datetime
+ sql = "select day from tbl_int_date where date = 'hello'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid datetime
+ sql = "select day from tbl_int_date where date = 2020-10-30";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid datetime
+ sql = "select day from tbl_int_date where date = 10-30";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid datetime
+ sql = "select day from tbl_int_date where date = '2020-10-12
12:23:76'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //invalid datetime with timestamp
+ sql = "select day from tbl_int_date where date = '1604031150'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ //valid datetime with timestamp in micro second
+ sql = "select day from tbl_int_date where date = '1604031150000'";
+ explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext,
"EXPLAIN " + sql);
+ Assert.assertTrue(explainString.contains("NULL"));
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]