This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d74176  [Optimize] Check invalid datetime to avoid scanning a lots of 
partitions (#5643)
8d74176 is described below

commit 8d7417697035ea2972769ccf8f820dcf479bfd39
Author: xinghuayu007 <[email protected]>
AuthorDate: Wed May 19 09:25:58 2021 +0800

    [Optimize] Check invalid datetime to avoid scanning a lots of partitions 
(#5643)
    
    Support parsing date format `'%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
    Support handling date time with nanoseconds
---
 .../java/org/apache/doris/analysis/Analyzer.java   |   2 +
 .../org/apache/doris/analysis/DateLiteral.java     |  15 ++-
 ...implifyInvalidDateBinaryPredicatesDateRule.java |  63 ++++++++++++
 .../org/apache/doris/analysis/DateLiteralTest.java |  14 +++
 .../org/apache/doris/planner/QueryPlanTest.java    | 110 ++++++++++++++++++++-
 5 files changed, 202 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
index dabf2f5..e5f872f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
@@ -41,6 +41,7 @@ import org.apache.doris.rewrite.ExprRewriter;
 import org.apache.doris.rewrite.FoldConstantsRule;
 import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
 import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
+import org.apache.doris.rewrite.SimplifyInvalidDateBinaryPredicatesDateRule;
 import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap;
 import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule;
 import org.apache.doris.rewrite.mvrewrite.CountFieldToSum;
@@ -257,6 +258,7 @@ public class Analyzer {
             rules.add(NormalizeBinaryPredicatesRule.INSTANCE);
             rules.add(FoldConstantsRule.INSTANCE);
             rules.add(RewriteFromUnixTimeRule.INSTANCE);
+            rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
             exprRewriter_ = new ExprRewriter(rules);
             // init mv rewriter
             List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 4025c61..eceef37 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -63,8 +63,12 @@ public class DateLiteral extends LiteralExpr {
 
     private static final int DATEKEY_LENGTH = 8;
     private static final int MAX_MICROSECOND = 999999;
+    private static final int DATETIME_TO_MINUTE_STRING_LENGTH = 16;
+    private static final int DATETIME_TO_HOUR_STRING_LENGTH = 13;
 
     private static DateTimeFormatter DATE_TIME_FORMATTER = null;
+    private static DateTimeFormatter DATE_TIME_FORMATTER_TO_HOUR = null;
+    private static DateTimeFormatter DATE_TIME_FORMATTER_TO_MINUTE = null;
     private static DateTimeFormatter DATE_FORMATTER = null;
     /* 
      * Dates containing two-digit year values are ambiguous because the 
century is unknown. 
@@ -90,6 +94,8 @@ public class DateLiteral extends LiteralExpr {
     static {
         try {
             DATE_TIME_FORMATTER = formatBuilder("%Y-%m-%d 
%H:%i:%s").toFormatter();
+            DATE_TIME_FORMATTER_TO_HOUR = formatBuilder("%Y-%m-%d 
%H").toFormatter();
+            DATE_TIME_FORMATTER_TO_MINUTE = formatBuilder("%Y-%m-%d 
%H:%i").toFormatter();
             DATE_FORMATTER = formatBuilder("%Y-%m-%d").toFormatter();
             DATEKEY_FORMATTER = formatBuilder("%Y%m%d").toFormatter();
             DATE_TIME_FORMATTER_TWO_DIGIT = formatBuilder("%y-%m-%d 
%H:%i:%s").toFormatter();
@@ -268,7 +274,14 @@ public class DateLiteral extends LiteralExpr {
                 if (s.split("-")[0].length() == 2) {
                     dateTime = 
DATE_TIME_FORMATTER_TWO_DIGIT.parseLocalDateTime(s);
                 } else {
-                    dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
+                    // parse format '%Y-%m-%d %H:%i' and '%Y-%m-%d %H'
+                    if (s.length() == DATETIME_TO_MINUTE_STRING_LENGTH) {
+                        dateTime = 
DATE_TIME_FORMATTER_TO_MINUTE.parseLocalDateTime(s);
+                    } else if (s.length() == DATETIME_TO_HOUR_STRING_LENGTH) {
+                        dateTime = 
DATE_TIME_FORMATTER_TO_HOUR.parseLocalDateTime(s);
+                    } else {
+                        dateTime = DATE_TIME_FORMATTER.parseLocalDateTime(s);
+                    }
                 }
             }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java
 
b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java
new file mode 100644
index 0000000..c049f96
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/rewrite/SimplifyInvalidDateBinaryPredicatesDateRule.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.rewrite;
+
+import org.apache.doris.analysis.Analyzer;
+import org.apache.doris.analysis.BinaryPredicate;
+import org.apache.doris.analysis.CastExpr;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.NullLiteral;
+import org.apache.doris.common.AnalysisException;
+
+/**
+ * this rule try to convert date expression, if date is invalid, it will be
+ * converted into null literal to avoid to scan all partitions
+ * if a date data is invalid or contains nanosecond, it will be convert into 
CastExpr
+ * only support rewriting pattern: slot + operator + date literal
+ * Examples:
+ * date = "2020-10-32" => NULL
+ */
+public class SimplifyInvalidDateBinaryPredicatesDateRule implements 
ExprRewriteRule {
+    public static ExprRewriteRule INSTANCE = new 
SimplifyInvalidDateBinaryPredicatesDateRule();
+    public static final int DATETIME_STRING_MAX_LENGTH = new 
String("yyyy-MM-dd HH:ii:ss").length();
+
+    @Override
+    public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+        if (!(expr instanceof BinaryPredicate)) return expr;
+        Expr lchild = expr.getChild(0);
+        if (!lchild.getType().isDateType()) {
+            return expr;
+        }
+        Expr valueExpr = expr.getChild(1);
+        if (!valueExpr.getType().isDateType()) {
+            return expr;
+        }
+        if (!valueExpr.isConstant()) {
+            return expr;
+        }
+        if (valueExpr instanceof CastExpr) {
+            String dateStr = valueExpr.toSql();
+            // if it contains millisecond, microsecond, nanosecond, do nothing
+            if (dateStr.length() > DATETIME_STRING_MAX_LENGTH && 
dateStr.contains(".")) {
+                return expr;
+            }
+            return new NullLiteral();
+        }
+        return expr;
+    }
+}
\ No newline at end of file
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
index 9b94f9d..b55ebc8 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
@@ -59,6 +59,20 @@ public class DateLiteralTest {
     }
 
     @Test
+    public void testParseDateTimeToHourORMinute() throws Exception{
+        String s = "2020-12-13 12:13:14";
+        Type type = Type.DATETIME;
+        DateLiteral literal = new DateLiteral(s, type);
+        Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:14"));
+        s = "2020-12-13 12:13";
+        literal = new DateLiteral(s, type);
+        Assert.assertTrue(literal.toSql().contains("2020-12-13 12:13:00"));
+        s = "2020-12-13 12";
+        literal = new DateLiteral(s, type);
+        Assert.assertTrue(literal.toSql().contains("2020-12-13 12:00:00"));
+    }
+
+    @Test
     public void uncheckedCastTo() {
         boolean hasException = false;
         try {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
index 3ce7198..f334377 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@@ -1467,9 +1467,117 @@ public class QueryPlanTest {
         //format less than
         sql = "select * from test1 where from_unixtime(query_time, 
'yyyy-MM-dd') < '2021-03-02 10:01:28'";
         explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
-        System.out.println("wangxixu-explain:"+explainString);
         Assert.assertTrue(explainString.contains("PREDICATES: `query_time` < 
1614614400, `query_time` >= 0"));
 
     }
 
+    @Test
+    public void testCheckInvalidDate() throws Exception {
+        FeConstants.runningUnitTest = true;
+        connectContext.setDatabase("default_cluster:test");
+        //valid date
+        String sql = "select day from tbl_int_date where day = '2020-10-30'";
+        String explainString = 
UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `day` = 
'2020-10-30 00:00:00'"));
+        sql = "select day from tbl_int_date where day = 
from_unixtime(1196440219)";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `day` = 
'2007-12-01 00:30:19'"));
+        sql = "select day from tbl_int_date where day = 
str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s');";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `day` = 
'2014-12-21 12:34:56'"));
+        //valid date
+        sql = "select day from tbl_int_date where day = 20201030";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `day` = 
'2020-10-30 00:00:00'"));
+        //valid date
+        sql = "select day from tbl_int_date where day = '20201030'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `day` = 
'2020-10-30 00:00:00'"));
+        //valid date contains micro second
+        sql = "select day from tbl_int_date where day = '2020-10-30 
10:00:01.111111'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `day` = 
'2020-10-30 10:00:01.111111'"));
+        //invalid date
+        sql = "select day from tbl_int_date where day = '2020-10-32'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid date
+        sql = "select day from tbl_int_date where day = '20201032'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid date
+        sql = "select day from tbl_int_date where day = 20201032";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid date
+        sql = "select day from tbl_int_date where day = 'hello'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid date
+        sql = "select day from tbl_int_date where day = 2020-10-30";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid date
+        sql = "select day from tbl_int_date where day = 10-30";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+
+
+        //valid datetime
+        sql = "select day from tbl_int_date where date = '2020-10-30 
12:12:30'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 12:12:30'"));
+        //valid datetime, support parsing to minute
+        sql = "select day from tbl_int_date where date = '2020-10-30 12:12'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 12:12:00'"));
+        //valid datetime, support parsing to hour
+        sql = "select day from tbl_int_date where date = '2020-10-30 12'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 12:00:00'"));
+        //valid datetime
+        sql = "select day from tbl_int_date where date = 20201030";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 00:00:00'"));
+        //valid datetime
+        sql = "select day from tbl_int_date where date = '20201030'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 00:00:00'"));
+        //valid datetime
+        sql = "select day from tbl_int_date where date = '2020-10-30'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 00:00:00'"));
+        //valid datetime contains micro second
+        sql = "select day from tbl_int_date where date = '2020-10-30 
10:00:01.111111'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("PREDICATES: `date` = 
'2020-10-30 10:00:01.111111'"));
+        //invalid datetime
+        sql = "select day from tbl_int_date where date = '2020-10-32'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid datetime
+        sql = "select day from tbl_int_date where date = 'hello'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid datetime
+        sql = "select day from tbl_int_date where date = 2020-10-30";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid datetime
+        sql = "select day from tbl_int_date where date = 10-30";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid datetime
+        sql = "select day from tbl_int_date where date = '2020-10-12 
12:23:76'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //invalid datetime with timestamp
+        sql = "select day from tbl_int_date where date = '1604031150'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+        //valid datetime with timestamp in micro second
+        sql = "select day from tbl_int_date where date = '1604031150000'";
+        explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, 
"EXPLAIN " + sql);
+        Assert.assertTrue(explainString.contains("NULL"));
+    }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to