This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 7c0b113aa83 [feature](datatype) add BE config to allow zero date 
(#34961) (#37214)
7c0b113aa83 is described below

commit 7c0b113aa83b0a2da29735778bcb3ecb155d5204
Author: Kaijie Chen <[email protected]>
AuthorDate: Wed Jul 3 19:05:01 2024 +0800

    [feature](datatype) add BE config to allow zero date (#34961) (#37214)
    
    cherry-pick #34961
    
    Co-authored-by: Gabriel <[email protected]>
---
 be/src/common/config.cpp               |  1 +
 be/src/common/config.h                 |  2 ++
 be/src/vec/io/io_helper.h              | 10 ++++++----
 be/src/vec/runtime/vdatetime_value.cpp | 19 +++++++++++++------
 be/src/vec/runtime/vdatetime_value.h   |  6 +++---
 be/test/vec/exprs/vexpr_test.cpp       | 24 ++++++++++++++++++++++++
 6 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 9e8f226dc0d..5f24168629e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1074,6 +1074,7 @@ DEFINE_mBool(enable_delete_when_cumu_compaction, "false");
 // max_write_buffer_number for rocksdb
 DEFINE_Int32(rocksdb_max_write_buffer_number, "5");
 
+DEFINE_mBool(allow_zero_date, "false");
 DEFINE_Bool(allow_invalid_decimalv2_literal, "false");
 DEFINE_mString(kerberos_ccache_path, "");
 DEFINE_mString(kerberos_krb5_conf_path, "/etc/krb5.conf");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 35e3a620288..8336515af3e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1111,6 +1111,8 @@ DECLARE_mBool(enable_delete_when_cumu_compaction);
 // max_write_buffer_number for rocksdb
 DECLARE_Int32(rocksdb_max_write_buffer_number);
 
+// Convert date 0000-00-00 to 0000-01-01. It's recommended to set to false.
+DECLARE_mBool(allow_zero_date);
 // Allow invalid decimalv2 literal for compatible with old version. Recommend 
set it false strongly.
 DECLARE_mBool(allow_invalid_decimalv2_literal);
 // Allow to specify kerberos credentials cache path.
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 3aa816ecd56..85a2de7404f 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -323,7 +323,7 @@ template <typename T>
 bool read_date_v2_text_impl(T& x, ReadBuffer& buf) {
     static_assert(std::is_same_v<UInt32, T>);
     auto dv = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(x);
-    auto ans = dv.from_date_str(buf.position(), buf.count());
+    auto ans = dv.from_date_str(buf.position(), buf.count(), 
config::allow_zero_date);
 
     // only to match the is_all_read() check to prevent return null
     buf.position() = buf.end();
@@ -335,7 +335,8 @@ template <typename T>
 bool read_date_v2_text_impl(T& x, ReadBuffer& buf, const cctz::time_zone& 
local_time_zone) {
     static_assert(std::is_same_v<UInt32, T>);
     auto dv = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(x);
-    auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone);
+    auto ans =
+            dv.from_date_str(buf.position(), buf.count(), local_time_zone, 
config::allow_zero_date);
 
     // only to match the is_all_read() check to prevent return null
     buf.position() = buf.end();
@@ -347,7 +348,7 @@ template <typename T>
 bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, UInt32 scale = -1) {
     static_assert(std::is_same_v<UInt64, T>);
     auto dv = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(x);
-    auto ans = dv.from_date_str(buf.position(), buf.count(), scale);
+    auto ans = dv.from_date_str(buf.position(), buf.count(), scale, 
config::allow_zero_date);
 
     // only to match the is_all_read() check to prevent return null
     buf.position() = buf.end();
@@ -360,7 +361,8 @@ bool read_datetime_v2_text_impl(T& x, ReadBuffer& buf, 
const cctz::time_zone& lo
                                 UInt32 scale = -1) {
     static_assert(std::is_same_v<UInt64, T>);
     auto dv = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(x);
-    auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone, 
scale);
+    auto ans = dv.from_date_str(buf.position(), buf.count(), local_time_zone, 
scale,
+                                config::allow_zero_date);
 
     // only to match the is_all_read() check to prevent return null
     buf.position() = buf.end();
diff --git a/be/src/vec/runtime/vdatetime_value.cpp 
b/be/src/vec/runtime/vdatetime_value.cpp
index fa82f05c64f..a9a6d148252 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -1970,18 +1970,20 @@ void DateV2Value<T>::format_datetime(uint32_t* 
date_val, bool* carry_bits) const
 // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format
 // 0    1  2  3  4  5  6      7
 template <typename T>
-bool DateV2Value<T>::from_date_str(const char* date_str, int len, int scale /* 
= -1*/) {
-    return from_date_str_base(date_str, len, scale, nullptr);
+bool DateV2Value<T>::from_date_str(const char* date_str, int len, int scale /* 
= -1*/,
+                                   bool convert_zero) {
+    return from_date_str_base(date_str, len, scale, nullptr, convert_zero);
 }
 // when we parse
 template <typename T>
 bool DateV2Value<T>::from_date_str(const char* date_str, int len,
-                                   const cctz::time_zone& local_time_zone, int 
scale /* = -1*/) {
-    return from_date_str_base(date_str, len, scale, &local_time_zone);
+                                   const cctz::time_zone& local_time_zone, int 
scale /* = -1*/,
+                                   bool convert_zero) {
+    return from_date_str_base(date_str, len, scale, &local_time_zone, 
convert_zero);
 }
 template <typename T>
 bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int 
scale,
-                                        const cctz::time_zone* 
local_time_zone) {
+                                        const cctz::time_zone* 
local_time_zone, bool convert_zero) {
     const char* ptr = date_str;
     const char* end = date_str + len;
     // ONLY 2, 6 can follow by a space
@@ -2183,7 +2185,12 @@ bool DateV2Value<T>::from_date_str_base(const char* 
date_str, int len, int scale
         return false;
     }
     if (is_invalid(date_val[0], date_val[1], date_val[2], 0, 0, 0, 0)) {
-        return false;
+        if (date_val[0] == 0 && date_val[1] == 0 && date_val[2] == 0 && 
convert_zero) {
+            date_val[1] = 1;
+            date_val[2] = 1;
+        } else {
+            return false;
+        }
     }
 
     // In check_range_and_set_time, for Date type the time part will be 
truncated. So if the timezone offset should make
diff --git a/be/src/vec/runtime/vdatetime_value.h 
b/be/src/vec/runtime/vdatetime_value.h
index ec4e8e11848..14967ae8820 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -841,9 +841,9 @@ public:
     // 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS'
     // 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS'
     // 'YYYYMMDDTHHMMSS'
-    bool from_date_str(const char* str, int len, int scale = -1);
+    bool from_date_str(const char* str, int len, int scale = -1, bool 
convert_zero = false);
     bool from_date_str(const char* str, int len, const cctz::time_zone& 
local_time_zone,
-                       int scale = -1);
+                       int scale = -1, bool convert_zero = false);
 
     // Convert this value to string
     // this will check type to decide which format to convert
@@ -1238,7 +1238,7 @@ private:
                              bool disable_lut = false);
 
     bool from_date_str_base(const char* date_str, int len, int scale,
-                            const cctz::time_zone* local_time_zone);
+                            const cctz::time_zone* local_time_zone, bool 
convert_zero);
 
     // Used to construct from int value
     int64_t standardize_timevalue(int64_t value);
diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp
index 3fe5c60f6b1..bd7981a51cb 100644
--- a/be/test/vec/exprs/vexpr_test.cpp
+++ b/be/test/vec/exprs/vexpr_test.cpp
@@ -523,6 +523,30 @@ TEST(TEST_VEXPR, LITERALTEST) {
         EXPECT_EQ(v, dt);
         EXPECT_EQ("2021-04-07", literal.value());
     }
+    {
+        DateV2Value<DateV2ValueType> data_time_value;
+        const char* date = "00000000";
+        EXPECT_EQ(data_time_value.from_date_str(date, strlen(date), -1, true), 
true);
+
+        DateV2Value<DateV2ValueType> data_time_value1;
+        const char* date1 = "00000101";
+        EXPECT_EQ(data_time_value1.from_date_str(date1, strlen(date1), -1, 
true), true);
+        EXPECT_EQ(data_time_value.to_int64(), data_time_value1.to_int64());
+
+        EXPECT_EQ(data_time_value.from_date_str(date, strlen(date)), false);
+    }
+    {
+        DateV2Value<DateTimeV2ValueType> data_time_value;
+        const char* date = "00000000111111";
+        EXPECT_EQ(data_time_value.from_date_str(date, strlen(date), -1, true), 
true);
+
+        DateV2Value<DateTimeV2ValueType> data_time_value1;
+        const char* date1 = "00000101111111";
+        EXPECT_EQ(data_time_value1.from_date_str(date1, strlen(date1), -1, 
true), true);
+        EXPECT_EQ(data_time_value.to_int64(), data_time_value1.to_int64());
+
+        EXPECT_EQ(data_time_value.from_date_str(date, strlen(date)), false);
+    }
     // jsonb
     {
         std::string j = R"([null,true,false,100,6.18,"abc"])";


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to