This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 5758be65ecb branch-4.0: [Enhancement](cast) Support backslash for
datetime strict-mode parsing in date part #56437 (#56508)
5758be65ecb is described below
commit 5758be65ecbbd162a555f90c54a8077ce3f60a5d
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Sep 27 09:55:27 2025 +0800
branch-4.0: [Enhancement](cast) Support backslash for datetime strict-mode
parsing in date part #56437 (#56508)
Cherry-picked from #56437
Co-authored-by: zclllyybb <[email protected]>
---
be/src/util/string_parser.hpp | 6 +++---
be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp | 11 ++++++-----
be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp | 10 +++++-----
be/src/vec/functions/cast/cast_to_datev2_impl.hpp | 10 +++++-----
be/test/vec/function/cast/cast_to_date_test.cpp | 7 ++++---
be/test/vec/function/cast/cast_to_datetime_test.cpp | 8 +++++---
.../nereids/trees/expressions/literal/StringLikeLiteral.java | 4 ++--
regression-test/data/cast_p0/cast_to_datetime.out | 6 ++++++
.../expression/fold_constant/fe_constant_cast_to_date.out | 3 +++
.../expression/fold_constant/fe_constant_cast_to_datetime.out | 3 +++
regression-test/suites/cast_p0/cast_to_datetime.groovy | 9 +++++++++
.../expression/fold_constant/fe_constant_cast_to_date.groovy | 5 +----
.../fold_constant/fe_constant_cast_to_datetime.groovy | 5 +----
13 files changed, 53 insertions(+), 34 deletions(-)
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index a4d7fa4b245..88e3ff031c0 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -154,10 +154,10 @@ inline bool is_delimiter(char c) {
}
inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;
-inline bool is_bar(char c) {
- return c == '-';
+inline bool is_date_sep(char c) {
+ return c == '-' || c == '/';
}
-inline auto consume_one_bar = skip_qualified_char<1, is_bar>;
+inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>;
inline bool is_colon(char c) {
return c == ':';
diff --git a/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
b/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
index b03140dfdad..a003a1c4069 100644
--- a/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
+++ b/be/src/vec/functions/cast/cast_to_date_or_datetime_impl.hpp
@@ -25,6 +25,7 @@
#include "common/status.h"
#include "runtime/primitive_type.h"
#include "util/asan_util.h"
+#include "util/string_parser.hpp"
#include "vec/core/types.h"
#include "vec/data_types/data_type_decimal.h" // IWYU pragma: keep
#include "vec/data_types/serde/data_type_serde.h"
@@ -279,7 +280,7 @@ inline bool CastToDateOrDatetime::from_integer(T input,
VecDateTimeValue& val,
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
-<date> ::= <year> "-" <month1> "-" <day1>
+<date> ::= <year> ("-" | "/") <month1> ("-" | "/") <day1>
| <year> <month2> <day2>
<year> ::= <digit>{2} | <digit>{4} ; 1970 为界
@@ -428,13 +429,13 @@ inline bool
CastToDateOrDatetime::from_string_strict_mode(const StringRef& str,
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 0), "too short date part,
got '{}'",
std::string {ptr, end});
- if (*ptr == '-') {
+ if (is_date_sep(*ptr)) {
// 2 digits year
++ptr; // consume one bar
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[1])),
"failed to consume 1 or 2 digits for
month, got {}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after month,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[2])),
@@ -455,13 +456,13 @@ inline bool
CastToDateOrDatetime::from_string_strict_mode(const StringRef& str,
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end,
part[1])),
"failed to consume 4 digits for year, got
{}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after year,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[2])),
"failed to consume 1 or 2 digits for
month, got {}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after month,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[3])),
diff --git a/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
b/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
index b21c6defbf0..3a0986bbbcc 100644
--- a/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
+++ b/be/src/vec/functions/cast/cast_to_datetimev2_impl.hpp
@@ -270,7 +270,7 @@ inline bool CastToDatetimeV2::from_integer(T input,
DateV2Value<DateTimeV2ValueT
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
-<date> ::= <year> "-" <month1> "-" <day1>
+<date> ::= <year> ("-" | "/") <month1> ("-" | "/") <day1>
| <year> <month2> <day2>
<year> ::= <digit>{2} | <digit>{4} ; 1970 为界
@@ -418,13 +418,13 @@ inline bool
CastToDatetimeV2::from_string_strict_mode(const StringRef& str,
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 0), "too short date part,
got '{}'",
std::string {ptr, end});
- if (*ptr == '-') {
+ if (is_date_sep(*ptr)) {
// 2 digits year
++ptr; // consume one bar
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[1])),
"failed to consume 1 or 2 digits for
month, got {}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after month,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[2])),
@@ -445,13 +445,13 @@ inline bool
CastToDatetimeV2::from_string_strict_mode(const StringRef& str,
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end,
part[1])),
"failed to consume 4 digits for year, got
{}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after year,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[2])),
"failed to consume 1 or 2 digits for
month, got {}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after month,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[3])),
diff --git a/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
b/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
index 3ebe4fc2b75..ec738268aad 100644
--- a/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
+++ b/be/src/vec/functions/cast/cast_to_datev2_impl.hpp
@@ -210,7 +210,7 @@ inline bool CastToDateV2::from_integer(T input,
DateV2Value<DateV2ValueType>& va
––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––
-<date> ::= <year> "-" <month1> "-" <day1>
+<date> ::= <year> ("-" | "/") <month1> ("-" | "/") <day1>
| <year> <month2> <day2>
<year> ::= <digit>{2} | <digit>{4} ; 1970 为界
@@ -358,13 +358,13 @@ inline bool CastToDateV2::from_string_strict_mode(const
StringRef& str,
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 0), "too short date part,
got '{}'",
std::string {ptr, end});
- if (*ptr == '-') {
+ if (is_date_sep(*ptr)) {
// 2 digits year
++ptr; // consume one bar
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[1])),
"failed to consume 1 or 2 digits for
month, got {}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after month,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[2])),
@@ -385,13 +385,13 @@ inline bool CastToDateV2::from_string_strict_mode(const
StringRef& str,
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end,
part[1])),
"failed to consume 4 digits for year, got
{}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after year,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[2])),
"failed to consume 1 or 2 digits for
month, got {}",
std::string {ptr, end});
- SET_PARAMS_RET_FALSE_IFN((consume_one_bar(ptr, end)),
+ SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)),
"failed to consume one bar after month,
got {}",
std::string {ptr, end});
SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end,
part[3])),
diff --git a/be/test/vec/function/cast/cast_to_date_test.cpp
b/be/test/vec/function/cast/cast_to_date_test.cpp
index adebf811ec5..084b8d45852 100644
--- a/be/test/vec/function/cast/cast_to_date_test.cpp
+++ b/be/test/vec/function/cast/cast_to_date_test.cpp
@@ -77,6 +77,10 @@ TEST_F(FunctionCastTest,
string_to_date_valid_case_strict_mode) {
{{std::string("00-01-01")}, std::string("2000-01-01")},
{{std::string("12010203040506.999")}, std::string("1201-02-03")},
{{std::string("12010203040506.")}, std::string("1201-02-03")},
+
+ {{std::string("2024/05/01")}, std::string("2024-05-01")},
+ {{std::string("2024/05-01T12:30:45")}, std::string("2024-05-01")},
+ {{std::string("2025/06/15T00:00:00.99999999999999")},
std::string("2025-06-15")},
};
check_function_for_cast_strict_mode<DataTypeDateV2>(input_types, data_set);
}
@@ -88,7 +92,6 @@ TEST_F(FunctionCastTest,
string_to_date_invalid_cases_in_strict_mode) {
{{std::string("abc")}, Null()},
{{std::string("2020-05-05 12:30:60")}, Null()},
{{std::string("2023-07-16T19.123+08:00")}, Null()},
- {{std::string("2024/05/01")}, Null()},
{{std::string("24012")}, Null()},
{{std::string("2411 123")}, Null()},
{{std::string("2024-05-01 01:030:02")}, Null()},
@@ -129,9 +132,7 @@ TEST_F(FunctionCastTest,
string_to_date_invalid_cases_in_strict_mode) {
{{std::string("2024-05-01 12:00:00")}, Null()},
{{std::string("2024.05.01")}, Null()},
{{std::string("2024.05.01 12.30.45")}, Null()},
- {{std::string("2024/05-01T12:30:45")}, Null()},
{{std::string("2024-05/01 12.30.45")}, Null()},
- {{std::string("2025/06/15T00:00:00.99999999999999")}, Null()},
{{std::string("-1")}, Null()},
{{std::string("-12")}, Null()},
{{std::string("-1234")}, Null()},
diff --git a/be/test/vec/function/cast/cast_to_datetime_test.cpp
b/be/test/vec/function/cast/cast_to_datetime_test.cpp
index 7df45edc8b6..af27a577b0a 100644
--- a/be/test/vec/function/cast/cast_to_datetime_test.cpp
+++ b/be/test/vec/function/cast/cast_to_datetime_test.cpp
@@ -82,6 +82,11 @@ TEST_F(FunctionCastTest,
string_to_datetime6_valid_case_strict_mode) {
{{std::string("00-01-01")}, std::string("2000-01-01")},
{{std::string("12010203040506.999")}, std::string("1201-02-03
04:05:06.999000")},
{{std::string("12010203040506.")}, std::string("1201-02-03
04:05:06")},
+
+ {{std::string("2024/05/01")}, std::string("2024-05-01")},
+ {{std::string("2024/05-01T12:30:45")}, std::string("2024-05-01
12:30:45")},
+ {{std::string("2025/06/15T00:00:00.99999999999999")},
+ std::string("2025-06-15 00:00:01")},
};
check_function_for_cast_strict_mode<DataTypeDateTimeV2>(input_types,
data_set, "", 6);
}
@@ -93,7 +98,6 @@ TEST_F(FunctionCastTest,
string_to_datetime6_invalid_cases_in_strict_mode) {
{{std::string("abc")}, Null()},
{{std::string("2020-05-05 12:30:60")}, Null()},
{{std::string("2023-07-16T19.123+08:00")}, Null()},
- {{std::string("2024/05/01")}, Null()},
{{std::string("24012")}, Null()},
{{std::string("2411 123")}, Null()},
{{std::string("2024-05-01 01:030:02")}, Null()},
@@ -134,9 +138,7 @@ TEST_F(FunctionCastTest,
string_to_datetime6_invalid_cases_in_strict_mode) {
{{std::string("2024-05-01 12:00:00")}, Null()},
{{std::string("2024.05.01")}, Null()},
{{std::string("2024.05.01 12.30.45")}, Null()},
- {{std::string("2024/05-01T12:30:45")}, Null()},
{{std::string("2024-05/01 12.30.45")}, Null()},
- {{std::string("2025/06/15T00:00:00.99999999999999")}, Null()},
{{std::string("-1")}, Null()},
{{std::string("-12")}, Null()},
{{std::string("-1234")}, Null()},
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
index 3e3639ae964..debd24d2b71 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/StringLikeLiteral.java
@@ -42,8 +42,8 @@ import java.util.regex.Pattern;
public abstract class StringLikeLiteral extends Literal implements
ComparableLiteral {
public static final int CHINESE_CHAR_BYTE_LENGTH = 4;
public static final String toDateStrictRegex
- // <date> ::= (<year> "-" <month1> "-" <day1>) | (<year> <month2>
<day2>)
- =
"((?:(?<year1>\\d{2}|\\d{4})-(?<month1>\\d{1,2})-(?<date1>\\d{1,2})"
+ // <date> ::= (<year> ("-" | "/") <month1> ("-" | "/") <day1>) |
(<year> <month2> <day2>)
+ =
"((?:(?<year1>\\d{2}|\\d{4})[-/](?<month1>\\d{1,2})[-/](?<date1>\\d{1,2})"
+ "|(?<year2>\\d{2}|\\d{4})(?<month2>\\d{2})(?<date2>\\d{2}))"
+ "(?:[T ]"
// <time> ::= <hour1> (":" <minute1> (":" <second1> <fraction>?)?)?
diff --git a/regression-test/data/cast_p0/cast_to_datetime.out
b/regression-test/data/cast_p0/cast_to_datetime.out
index 824d77c6529..8927aa936a1 100644
--- a/regression-test/data/cast_p0/cast_to_datetime.out
+++ b/regression-test/data/cast_p0/cast_to_datetime.out
@@ -608,3 +608,9 @@
-- !sql203 --
\N
+-- !strict_1 --
+2023-06-10T03:55:33
+
+-- !strict_1 --
+2023-06-10
+
diff --git
a/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.out
b/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.out
index e4c09dd6773..176e58405a0 100644
---
a/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.out
+++
b/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.out
@@ -95,6 +95,9 @@
-- !date32 --
2016-01-01
+-- !sql --
+2024-05-01
+
-- !date1 --
2023-07-16
diff --git
a/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.out
b/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.out
index b18c19abdd6..2b492b1581e 100644
---
a/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.out
+++
b/regression-test/data/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.out
@@ -95,6 +95,9 @@
-- !datetime32 --
2016-01-01T00:00
+-- !sql --
+2024-05-01T00:00
+
-- !datetime1 --
2023-07-16T19:20:30
diff --git a/regression-test/suites/cast_p0/cast_to_datetime.groovy
b/regression-test/suites/cast_p0/cast_to_datetime.groovy
index 06e5a476187..aa73837a8a4 100644
--- a/regression-test/suites/cast_p0/cast_to_datetime.groovy
+++ b/regression-test/suites/cast_p0/cast_to_datetime.groovy
@@ -232,4 +232,13 @@ qt_sql200 """ select cast(cast("2020-12-12 00:00:00.123"
as datetime(3)) as date
qt_sql201 """ select cast(cast("2020-12-12 00:00:00.123456" as datetime(6)) as
datetime(3)) """
qt_sql202 """ select cast(cast("2020-12-12 00:00:00.99666" as datetime(6)) as
datetime(2)) """
qt_sql203 """ select cast(cast("9999-12-31 23:59:59.999999" as datetime(6)) as
datetime(5)) """
+
+ sql "set debug_skip_fold_constant = false"
+
+ // for strict mode
+ sql "set enable_strict_cast = true"
+ qt_strict_1 "select cast('2023/6/10 3:55:33' as datetime(6)) "
+ testFoldConst("select cast('2023/6/10 3:55:33' as datetime(6)) ")
+ qt_strict_1 "select cast('2023/6/10 3:55:33' as date) "
+ testFoldConst("select cast('2023/6/10 3:55:33' as date) ")
}
\ No newline at end of file
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.groovy
index ff215cd1dd2..3e4c20a4954 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_date.groovy
@@ -61,10 +61,7 @@ suite("fe_constant_cast_to_date") {
sql """select cast("2023-07-16T19.123+08:00" as date)"""
exception "can't cast to DATETIMEV2"
}
- test {
- sql """select cast("2024/05/01" as date)"""
- exception "can't cast to DATETIMEV2"
- }
+ qt_sql """select cast("2024/05/01" as date)"""
test {
sql """select cast("24012" as date)"""
exception "can't cast to DATETIMEV2"
diff --git
a/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.groovy
b/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.groovy
index 340b5400a4e..22dae2e6384 100644
---
a/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.groovy
+++
b/regression-test/suites/nereids_p0/expression/fold_constant/fe_constant_cast_to_datetime.groovy
@@ -61,10 +61,7 @@ suite("fe_constant_cast_to_datetime") {
sql """select cast("2023-07-16T19.123+08:00" as datetime)"""
exception "can't cast to DATETIMEV2"
}
- test {
- sql """select cast("2024/05/01" as datetime)"""
- exception "can't cast to DATETIMEV2"
- }
+ qt_sql """select cast("2024/05/01" as datetime)"""
test {
sql """select cast("24012" as datetime)"""
exception "can't cast to DATETIMEV2"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]