This is an automated email from the ASF dual-hosted git repository.

zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 333e33d6a6b [fix](parse) No longer throws exceptions when parse 
datetime failed in `from_olap_string` (#63035)
333e33d6a6b is described below

commit 333e33d6a6bee1460211115f766df173efc75ae3
Author: zclllyybb <[email protected]>
AuthorDate: Fri May 8 15:28:22 2026 +0800

    [fix](parse) No longer throws exceptions when parse datetime failed in 
`from_olap_string` (#63035)
    
    Problem Summary: SerDe from_olap_string, from_fe_string, and
    from_zonemap_string handled invalid scalar strings inconsistently: some
    supported types returned errors while others filled default fields. This
    change makes supported string deserialization paths fill type defaults
    consistently on parse failure.
    
    ATTN: We used to write some values in zonemap that were considered
    unreasonable after upgrading to later versions, as these values were
    actually not used (mostly just placeholders in partial updates or
    something). The only reasonable approach was to ignore them.
---
 .../data_type_date_or_datetime_serde.cpp           | 11 +++-
 .../data_type_serde/data_type_datetimev2_serde.cpp |  8 ++-
 .../data_type_serde/data_type_datev2_serde.cpp     |  6 ++-
 be/test/storage/olap_type_test.cpp                 | 62 +++++++++++++++++++++-
 4 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp 
b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
index daf3892eae2..151c0ad58d0 100644
--- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
@@ -434,7 +434,16 @@ Status DataTypeDateSerDe<T>::from_olap_string(const 
std::string& str, Field& fie
                 ? DatelikeTargetType::DATE_TIME
                 : DatelikeTargetType::DATE > (StringRef(str), res, 
options.timezone, params))
             [[unlikely]] {
-        return Status::InvalidArgument("parse date or datetime fail, string: 
'{}'", str);
+        // In paths like partial update, we may fill default values into 
zonemap, while the default values for date-related
+        // types are filled with the default value 0 of the number base, 
corresponding to the date 0000-00-00, which is not always valid.
+        // so for the parse path of zonemap strings, we swallow the failure 
and return a default value. the value itself does not matter,
+        // after compaction it will be replaced.
+        res = VecDateTimeValue::FIRST_DAY;
+        if constexpr (IsDatetime) {
+            res.to_datetime();
+        } else {
+            res.cast_to_date();
+        }
     }
     field = Field::create_field<T>(std::move(res));
     return Status::OK();
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp 
b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
index 90a3e8359e6..14df4e7c854 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
@@ -133,7 +133,9 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef& str, 
IColumn& column,
 //   "2023-10-15 14:30:00.123000"  => scale 6, microsecond = 123000
 //   "2023-10-15 14:30:00.123"     => scale 3, microsecond = 123000
 //
-// On parse failure, falls back to MIN_DATETIME_V2.
+// On parse failure, falls back to MIN_DATETIME_V2, the packed lower-bound
+// DateTimeV2 value. This is MIN_DATE_V2 shifted into the DateTimeV2 date part,
+// not VecDateTimeValue::FIRST_DAY, which belongs to the V1 representation.
 Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str, 
Field& field,
                                                  const FormatOptions& options) 
const {
     CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -141,6 +143,10 @@ Status DataTypeDateTimeV2SerDe::from_olap_string(const 
std::string& str, Field&
     DateV2Value<DateTimeV2ValueType> res;
     std::string date_format = "%Y-%m-%d %H:%i:%s.%f";
 
+    // In paths like partial update, we may fill default values into zonemap, 
while the default values for date-related
+    // types are filled with the default value 0 of the number base, 
corresponding to the date 0000-00-00, which is not always valid.
+    // so for the parse path of zonemap strings, we swallow the failure and 
return a default value. the value itself does not matter,
+    // after compaction it will be replaced.
     if (!res.from_date_format_str(date_format.data(), date_format.size(), 
str.data(), str.size())) {
         res = DateV2Value<DateTimeV2ValueType>(MIN_DATETIME_V2);
     }
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp 
b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
index 032a4cf642f..885c41bf204 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
@@ -234,7 +234,7 @@ Status DataTypeDateV2SerDe::from_string_batch(const 
ColumnString& col_str, Colum
 //   uint32_t value = (year << 9) | (month << 5) | day
 //
 // Expected input format: "YYYY-MM-DD", e.g. "2023-10-15"
-// On parse failure, falls back to MIN_DATE_V2.
+// On parse failure, falls back to MIN_DATE_V2, the packed lower-bound DateV2 
value.
 Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field& 
field,
                                              const FormatOptions& options) 
const {
     CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -243,6 +243,10 @@ Status DataTypeDateV2SerDe::from_olap_string(const 
std::string& str, Field& fiel
     tm time_tm;
     char* tmp = strptime(str.c_str(), "%Y-%m-%d", &time_tm);
 
+    // In paths like partial update, we may fill default values into zonemap, 
while the default values for date-related
+    // types are filled with the default value 0 of the number base, 
corresponding to the date 0000-00-00, which is not always valid.
+    // so for the parse path of zonemap strings, we swallow the failure and 
return a default value. the value itself does not matter,
+    // after compaction it will be replaced.
     if (nullptr != tmp) {
         uint32_t value =
                 ((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5) 
| time_tm.tm_mday;
diff --git a/be/test/storage/olap_type_test.cpp 
b/be/test/storage/olap_type_test.cpp
index 8789c267097..05775b693e4 100644
--- a/be/test/storage/olap_type_test.cpp
+++ b/be/test/storage/olap_type_test.cpp
@@ -47,6 +47,25 @@ public:
     }
 };
 
+template <typename CheckField>
+void expect_from_storage_string_paths(const DataTypePtr& data_type, const 
std::string& input,
+                                      CheckField&& check_field) {
+    auto serde = data_type->get_serde();
+    for (int path = 0; path < 3; ++path) {
+        Field field;
+        const char* path_name = path == 0   ? "from_olap_string"
+                                : path == 1 ? "from_fe_string"
+                                            : "from_zonemap_string";
+        auto status =
+                path == 0   ? serde->from_olap_string(input, field, 
DataTypeSerDe::FormatOptions())
+                : path == 1 ? serde->from_fe_string(input, field)
+                            : serde->from_zonemap_string(input, field);
+        ASSERT_TRUE(status.ok()) << data_type->get_name() << " " << path_name
+                                 << " failed: " << status.to_string();
+        check_field(field);
+    }
+}
+
 // deserialize float string serialized by old version of Doris
 TEST_F(OlapTypeTest, deser_float_old) {
     std::vector<float> normal_input_values = {
@@ -607,6 +626,47 @@ TEST_F(OlapTypeTest, ser_deser_double) {
     }
 }
 
+TEST_F(OlapTypeTest, datelike_storage_string_parse_failure_defaults) {
+    const std::string invalid = "not-a-valid-value";
+
+    VecDateTimeValue datev1_default = VecDateTimeValue::FIRST_DAY;
+    datev1_default.cast_to_date();
+    const auto expected_datev1 = 
Field::create_field<TYPE_DATE>(datev1_default);
+    
expect_from_storage_string_paths(DataTypeFactory::instance().create_data_type(TYPE_DATE,
 false),
+                                     invalid, [&](const Field& field) {
+                                         ASSERT_EQ(field.get_type(), 
TYPE_DATE);
+                                         EXPECT_TRUE(field == expected_datev1);
+                                     });
+
+    VecDateTimeValue datetimev1_default = VecDateTimeValue::FIRST_DAY;
+    datetimev1_default.to_datetime();
+    const auto expected_datetimev1 = 
Field::create_field<TYPE_DATETIME>(datetimev1_default);
+    expect_from_storage_string_paths(
+            DataTypeFactory::instance().create_data_type(TYPE_DATETIME, 
false), invalid,
+            [&](const Field& field) {
+                ASSERT_EQ(field.get_type(), TYPE_DATETIME);
+                EXPECT_TRUE(field == expected_datetimev1);
+            });
+
+    const auto expected_datev2 =
+            
Field::create_field<TYPE_DATEV2>(DateV2Value<DateV2ValueType>(MIN_DATE_V2));
+    expect_from_storage_string_paths(
+            DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false), 
invalid,
+            [&](const Field& field) {
+                ASSERT_EQ(field.get_type(), TYPE_DATEV2);
+                EXPECT_TRUE(field == expected_datev2);
+            });
+
+    const auto expected_datetimev2 =
+            
Field::create_field<TYPE_DATETIMEV2>(DateV2Value<DateTimeV2ValueType>(MIN_DATETIME_V2));
+    expect_from_storage_string_paths(
+            DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2, 
false, 0, 6), invalid,
+            [&](const Field& field) {
+                ASSERT_EQ(field.get_type(), TYPE_DATETIMEV2);
+                EXPECT_TRUE(field == expected_datetimev2);
+            });
+}
+
 // 
=============================================================================
 // Tests for to_olap_string / from_zonemap_string on DataTypeSerDe
 //
@@ -2023,4 +2083,4 @@ TEST_F(OlapTypeTest, timestamptz_type) {
                 << "serde mismatch for TIMESTAMPTZ expected=" << tc.expected;
     }
 }
-} // namespace doris
\ No newline at end of file
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to