This is an automated email from the ASF dual-hosted git repository.
zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 333e33d6a6b [fix](parse) No longer throws exceptions when parse
datetime failed in `from_olap_string` (#63035)
333e33d6a6b is described below
commit 333e33d6a6bee1460211115f766df173efc75ae3
Author: zclllyybb <[email protected]>
AuthorDate: Fri May 8 15:28:22 2026 +0800
[fix](parse) No longer throws exceptions when parse datetime failed in
`from_olap_string` (#63035)
Problem Summary: SerDe from_olap_string, from_fe_string, and
from_zonemap_string handled invalid scalar strings inconsistently: some
supported types returned errors while others filled default fields. This
change makes supported string deserialization paths fill type defaults
consistently on parse failure.
ATTN: We used to write some values in zonemap that were considered
unreasonable after upgrading to later versions, as these values were
actually not used (mostly just placeholders in partial updates or
something). The only reasonable approach was to ignore them.
---
.../data_type_date_or_datetime_serde.cpp | 11 +++-
.../data_type_serde/data_type_datetimev2_serde.cpp | 8 ++-
.../data_type_serde/data_type_datev2_serde.cpp | 6 ++-
be/test/storage/olap_type_test.cpp | 62 +++++++++++++++++++++-
4 files changed, 83 insertions(+), 4 deletions(-)
diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
index daf3892eae2..151c0ad58d0 100644
--- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
@@ -434,7 +434,16 @@ Status DataTypeDateSerDe<T>::from_olap_string(const
std::string& str, Field& fie
? DatelikeTargetType::DATE_TIME
: DatelikeTargetType::DATE > (StringRef(str), res,
options.timezone, params))
[[unlikely]] {
- return Status::InvalidArgument("parse date or datetime fail, string:
'{}'", str);
+ // In paths like partial update, we may fill default values into
zonemap, while the default values for date-related
+ // types are filled with the default value 0 of the number base,
corresponding to the date 0000-00-00, which is not always valid.
+ // so for the parse path of zonemap strings, we swallow the failure
and return a default value. the value itself does not matter,
+ // after compaction it will be replaced.
+ res = VecDateTimeValue::FIRST_DAY;
+ if constexpr (IsDatetime) {
+ res.to_datetime();
+ } else {
+ res.cast_to_date();
+ }
}
field = Field::create_field<T>(std::move(res));
return Status::OK();
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
index 90a3e8359e6..14df4e7c854 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
@@ -133,7 +133,9 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef& str,
IColumn& column,
// "2023-10-15 14:30:00.123000" => scale 6, microsecond = 123000
// "2023-10-15 14:30:00.123" => scale 3, microsecond = 123000
//
-// On parse failure, falls back to MIN_DATETIME_V2.
+// On parse failure, falls back to MIN_DATETIME_V2, the packed lower-bound
+// DateTimeV2 value. This is MIN_DATE_V2 shifted into the DateTimeV2 date part,
+// not VecDateTimeValue::FIRST_DAY, which belongs to the V1 representation.
Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str,
Field& field,
const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -141,6 +143,10 @@ Status DataTypeDateTimeV2SerDe::from_olap_string(const
std::string& str, Field&
DateV2Value<DateTimeV2ValueType> res;
std::string date_format = "%Y-%m-%d %H:%i:%s.%f";
+ // In paths like partial update, we may fill default values into zonemap,
while the default values for date-related
+ // types are filled with the default value 0 of the number base,
corresponding to the date 0000-00-00, which is not always valid.
+ // so for the parse path of zonemap strings, we swallow the failure and
return a default value. the value itself does not matter,
+ // after compaction it will be replaced.
if (!res.from_date_format_str(date_format.data(), date_format.size(),
str.data(), str.size())) {
res = DateV2Value<DateTimeV2ValueType>(MIN_DATETIME_V2);
}
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
index 032a4cf642f..885c41bf204 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
@@ -234,7 +234,7 @@ Status DataTypeDateV2SerDe::from_string_batch(const
ColumnString& col_str, Colum
// uint32_t value = (year << 9) | (month << 5) | day
//
// Expected input format: "YYYY-MM-DD", e.g. "2023-10-15"
-// On parse failure, falls back to MIN_DATE_V2.
+// On parse failure, falls back to MIN_DATE_V2, the packed lower-bound DateV2
value.
Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field&
field,
const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
@@ -243,6 +243,10 @@ Status DataTypeDateV2SerDe::from_olap_string(const
std::string& str, Field& fiel
tm time_tm;
char* tmp = strptime(str.c_str(), "%Y-%m-%d", &time_tm);
+ // In paths like partial update, we may fill default values into zonemap,
while the default values for date-related
+ // types are filled with the default value 0 of the number base,
corresponding to the date 0000-00-00, which is not always valid.
+ // so for the parse path of zonemap strings, we swallow the failure and
return a default value. the value itself does not matter,
+ // after compaction it will be replaced.
if (nullptr != tmp) {
uint32_t value =
((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5)
| time_tm.tm_mday;
diff --git a/be/test/storage/olap_type_test.cpp
b/be/test/storage/olap_type_test.cpp
index 8789c267097..05775b693e4 100644
--- a/be/test/storage/olap_type_test.cpp
+++ b/be/test/storage/olap_type_test.cpp
@@ -47,6 +47,25 @@ public:
}
};
+template <typename CheckField>
+void expect_from_storage_string_paths(const DataTypePtr& data_type, const
std::string& input,
+ CheckField&& check_field) {
+ auto serde = data_type->get_serde();
+ for (int path = 0; path < 3; ++path) {
+ Field field;
+ const char* path_name = path == 0 ? "from_olap_string"
+ : path == 1 ? "from_fe_string"
+ : "from_zonemap_string";
+ auto status =
+ path == 0 ? serde->from_olap_string(input, field,
DataTypeSerDe::FormatOptions())
+ : path == 1 ? serde->from_fe_string(input, field)
+ : serde->from_zonemap_string(input, field);
+ ASSERT_TRUE(status.ok()) << data_type->get_name() << " " << path_name
+ << " failed: " << status.to_string();
+ check_field(field);
+ }
+}
+
// deserialize float string serialized by old version of Doris
TEST_F(OlapTypeTest, deser_float_old) {
std::vector<float> normal_input_values = {
@@ -607,6 +626,47 @@ TEST_F(OlapTypeTest, ser_deser_double) {
}
}
+TEST_F(OlapTypeTest, datelike_storage_string_parse_failure_defaults) {
+ const std::string invalid = "not-a-valid-value";
+
+ VecDateTimeValue datev1_default = VecDateTimeValue::FIRST_DAY;
+ datev1_default.cast_to_date();
+ const auto expected_datev1 =
Field::create_field<TYPE_DATE>(datev1_default);
+
expect_from_storage_string_paths(DataTypeFactory::instance().create_data_type(TYPE_DATE,
false),
+ invalid, [&](const Field& field) {
+ ASSERT_EQ(field.get_type(),
TYPE_DATE);
+ EXPECT_TRUE(field == expected_datev1);
+ });
+
+ VecDateTimeValue datetimev1_default = VecDateTimeValue::FIRST_DAY;
+ datetimev1_default.to_datetime();
+ const auto expected_datetimev1 =
Field::create_field<TYPE_DATETIME>(datetimev1_default);
+ expect_from_storage_string_paths(
+ DataTypeFactory::instance().create_data_type(TYPE_DATETIME,
false), invalid,
+ [&](const Field& field) {
+ ASSERT_EQ(field.get_type(), TYPE_DATETIME);
+ EXPECT_TRUE(field == expected_datetimev1);
+ });
+
+ const auto expected_datev2 =
+
Field::create_field<TYPE_DATEV2>(DateV2Value<DateV2ValueType>(MIN_DATE_V2));
+ expect_from_storage_string_paths(
+ DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false),
invalid,
+ [&](const Field& field) {
+ ASSERT_EQ(field.get_type(), TYPE_DATEV2);
+ EXPECT_TRUE(field == expected_datev2);
+ });
+
+ const auto expected_datetimev2 =
+
Field::create_field<TYPE_DATETIMEV2>(DateV2Value<DateTimeV2ValueType>(MIN_DATETIME_V2));
+ expect_from_storage_string_paths(
+ DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2,
false, 0, 6), invalid,
+ [&](const Field& field) {
+ ASSERT_EQ(field.get_type(), TYPE_DATETIMEV2);
+ EXPECT_TRUE(field == expected_datetimev2);
+ });
+}
+
//
=============================================================================
// Tests for to_olap_string / from_zonemap_string on DataTypeSerDe
//
@@ -2023,4 +2083,4 @@ TEST_F(OlapTypeTest, timestamptz_type) {
<< "serde mismatch for TIMESTAMPTZ expected=" << tc.expected;
}
}
-} // namespace doris
\ No newline at end of file
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]