This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit a6f7747d29a1acfecc8efc7ca0a11a8b57aeb2f0 Author: Kaijie Chen <[email protected]> AuthorDate: Wed May 22 21:59:27 2024 +0800 [feature](datatype) add BE config to allow zero date (#34961) Co-authored-by: Gabriel <[email protected]> --- be/src/common/config.cpp | 1 + be/src/common/config.h | 2 ++ be/src/vec/io/io_helper.h | 10 ++++++---- be/src/vec/runtime/vdatetime_value.cpp | 19 +++++++++++++------ be/src/vec/runtime/vdatetime_value.h | 6 +++--- be/test/vec/exprs/vexpr_test.cpp | 24 ++++++++++++++++++++++++ 6 files changed, 49 insertions(+), 13 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 11600c4b5e9..546d69a57b4 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1061,6 +1061,7 @@ DEFINE_mBool(enable_delete_when_cumu_compaction, "false"); // max_write_buffer_number for rocksdb DEFINE_Int32(rocksdb_max_write_buffer_number, "5"); +DEFINE_mBool(allow_zero_date, "false"); DEFINE_Bool(allow_invalid_decimalv2_literal, "false"); DEFINE_mString(kerberos_ccache_path, ""); DEFINE_mString(kerberos_krb5_conf_path, "/etc/krb5.conf"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 15450a4b716..54f8d3459dd 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1102,6 +1102,8 @@ DECLARE_mBool(enable_delete_when_cumu_compaction); // max_write_buffer_number for rocksdb DECLARE_Int32(rocksdb_max_write_buffer_number); +// Convert date 0000-00-00 to 0000-01-01. It's recommended to set to false. +DECLARE_mBool(allow_zero_date); // Allow invalid decimalv2 literal for compatible with old version. Recommend set it false strongly. DECLARE_mBool(allow_invalid_decimalv2_literal); // Allow to specify kerberos credentials cache path. diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index 1f7a8ffdf18..d5ca522146a 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -343,7 +343,7 @@ template <typename T> bool read_date_v2_text_impl(T& x, ReadBuffer& buf) { static_assert(std::is_same_v<UInt32, T>); auto dv = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(x); - auto ans = dv.from_date_str(buf.position(), buf.count()); + auto ans = dv.from_date_str(buf.position(), buf.count(), config::allow_zero_date); // only to match the is_all_read() check to prevent return null buf.position() = buf.end(); @@ -355,7 +355,8 @@ template <typename T> bool read_date_v2_text_impl(T& x, ReadBuffer& buf, const cctz::time_zone& local_time_zone) { static_assert(std::is_same_v<UInt32, T>); auto dv = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(x); - auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone); + auto ans = + dv.from_date_str(buf.position(), buf.count(), local_time_zone, config::allow_zero_date); // only to match the is_all_read() check to prevent return null buf.position() = buf.end(); @@ -367,7 +368,7 @@ template <typename T> bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, UInt32 scale = -1) { static_assert(std::is_same_v<UInt64, T>); auto dv = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(x); - auto ans = dv.from_date_str(buf.position(), buf.count(), scale); + auto ans = dv.from_date_str(buf.position(), buf.count(), scale, config::allow_zero_date); // only to match the is_all_read() check to prevent return null buf.position() = buf.end(); @@ -380,7 +381,8 @@ bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, const cctz::time_zone& lo UInt32 scale = -1) { static_assert(std::is_same_v<UInt64, T>); auto dv = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(x); - auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone, scale); + auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone, scale, + config::allow_zero_date); // only to match the is_all_read() check to prevent return null buf.position() = buf.end(); diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index dd0ce341493..5c6e788695c 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -1974,18 +1974,20 @@ void DateV2Value<T>::format_datetime(uint32_t* date_val, bool* carry_bits) const // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format // 0 1 2 3 4 5 6 7 template <typename T> -bool DateV2Value<T>::from_date_str(const char* date_str, int len, int scale /* = -1*/) { - return from_date_str_base(date_str, len, scale, nullptr); +bool DateV2Value<T>::from_date_str(const char* date_str, int len, int scale /* = -1*/, + bool convert_zero) { + return from_date_str_base(date_str, len, scale, nullptr, convert_zero); } // when we parse template <typename T> bool DateV2Value<T>::from_date_str(const char* date_str, int len, - const cctz::time_zone& local_time_zone, int scale /* = -1*/) { - return from_date_str_base(date_str, len, scale, &local_time_zone); + const cctz::time_zone& local_time_zone, int scale /* = -1*/, + bool convert_zero) { + return from_date_str_base(date_str, len, scale, &local_time_zone, convert_zero); } template <typename T> bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale, - const cctz::time_zone* local_time_zone) { + const cctz::time_zone* local_time_zone, bool convert_zero) { const char* ptr = date_str; const char* end = date_str + len; // ONLY 2, 6 can follow by a space @@ -2187,7 +2189,12 @@ bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale return false; } if (is_invalid(date_val[0], date_val[1], date_val[2], 0, 0, 0, 0)) { - return false; + if (date_val[0] == 0 && date_val[1] == 0 && date_val[2] == 0 && convert_zero) { + date_val[1] = 1; + date_val[2] = 1; + } else { + return false; + } } // In check_range_and_set_time, for Date type the time part will be truncated. So if the timezone offset should make diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 8fcb45a6440..dfc3ab332a7 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -847,9 +847,9 @@ public: // 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS' // 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS' // 'YYYYMMDDTHHMMSS' - bool from_date_str(const char* str, int len, int scale = -1); + bool from_date_str(const char* str, int len, int scale = -1, bool convert_zero = false); bool from_date_str(const char* str, int len, const cctz::time_zone& local_time_zone, - int scale = -1); + int scale = -1, bool convert_zero = false); // Convert this value to string // this will check type to decide which format to convert @@ -1245,7 +1245,7 @@ private: bool disable_lut = false); bool from_date_str_base(const char* date_str, int len, int scale, - const cctz::time_zone* local_time_zone); + const cctz::time_zone* local_time_zone, bool convert_zero); // Used to construct from int value int64_t standardize_timevalue(int64_t value); diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index 9649f2cfe69..bc18fa30f4b 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -519,6 +519,30 @@ TEST(TEST_VEXPR, LITERALTEST) { EXPECT_EQ(v, dt); EXPECT_EQ("2021-04-07", literal.value()); } + { + DateV2Value<DateV2ValueType> data_time_value; + const char* date = "00000000"; + EXPECT_EQ(data_time_value.from_date_str(date, strlen(date), -1, true), true); + + DateV2Value<DateV2ValueType> data_time_value1; + const char* date1 = "00000101"; + EXPECT_EQ(data_time_value1.from_date_str(date1, strlen(date1), -1, true), true); + EXPECT_EQ(data_time_value.to_int64(), data_time_value1.to_int64()); + + EXPECT_EQ(data_time_value.from_date_str(date, strlen(date)), false); + } + { + DateV2Value<DateTimeV2ValueType> data_time_value; + const char* date = "00000000111111"; + EXPECT_EQ(data_time_value.from_date_str(date, strlen(date), -1, true), true); + + DateV2Value<DateTimeV2ValueType> data_time_value1; + const char* date1 = "00000101111111"; + EXPECT_EQ(data_time_value1.from_date_str(date1, strlen(date1), -1, true), true); + EXPECT_EQ(data_time_value.to_int64(), data_time_value1.to_int64()); + + EXPECT_EQ(data_time_value.from_date_str(date, strlen(date)), false); + } // jsonb { std::string j = R"([null,true,false,100,6.18,"abc"])"; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
