This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new a7d727b15c4 branch-4.0: [Fix](function) avoid false alarm of some
datelike functions #59897 (#59931)
a7d727b15c4 is described below
commit a7d727b15c4c50db4e31dd8f3729293fb81773b6
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 16 10:39:34 2026 +0800
branch-4.0: [Fix](function) avoid false alarm of some datelike functions
#59897 (#59931)
Cherry-picked from #59897
Co-authored-by: zclllyybb <[email protected]>
---
.../function_date_or_datetime_computation.h | 32 +++-
.../vec/functions/function_other_types_to_date.cpp | 165 ++++++++++++++-------
.../data/correctness_p0/test_date_trunc_error.out | 6 +
.../correctness_p0/test_date_trunc_error.groovy | 85 +++++++++++
4 files changed, 227 insertions(+), 61 deletions(-)
diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h
b/be/src/vec/functions/function_date_or_datetime_computation.h
index c6b3b5a9899..ec1c6ff36b3 100644
--- a/be/src/vec/functions/function_date_or_datetime_computation.h
+++ b/be/src/vec/functions/function_date_or_datetime_computation.h
@@ -1127,16 +1127,34 @@ struct TimestampToDateTime : IFunction {
static FunctionPtr create() { return
std::make_shared<TimestampToDateTime<Impl>>(); }
+ // Handle nulls manually to prevent invalid default values from causing
errors
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const
override {
- const auto& arg_col = block.get_by_position(arguments[0]).column;
- const auto& column_data = assert_cast<const ColumnInt64&>(*arg_col);
+ // Handle null map manually
+ auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+ NullMap& result_null_map =
assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+
+ ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
+ const NullMap* null_map =
VectorizedUtils::get_null_map(argument_column);
+ if (null_map) {
+ VectorizedUtils::update_null_map(result_null_map, *null_map);
+ }
+
+ // Extract nested column
+ argument_column = remove_nullable(argument_column);
+
+ const auto& column_data = assert_cast<const
ColumnInt64&>(*argument_column);
auto res_col = ColumnDateTimeV2::create();
res_col->get_data().resize_fill(input_rows_count, 0);
auto& res_data = res_col->get_data();
const cctz::time_zone& time_zone = context->state()->timezone_obj();
- for (int i = 0; i < input_rows_count; ++i) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (result_null_map[i]) {
+ continue;
+ }
Int64 value = column_data.get_element(i);
if (value < 0) [[unlikely]] {
throw_out_of_bound_int(name, value);
@@ -1151,7 +1169,13 @@ struct TimestampToDateTime : IFunction {
dt.set_microsecond((value % Impl::ratio) * ratio_to_micro);
}
- block.replace_by_position(result, std::move(res_col));
+ if (null_map) {
+ block.replace_by_position(
+ result,
+ ColumnNullable::create(std::move(res_col),
std::move(result_null_map_column)));
+ } else {
+ block.replace_by_position(result, std::move(res_col));
+ }
return Status::OK();
}
};
diff --git a/be/src/vec/functions/function_other_types_to_date.cpp
b/be/src/vec/functions/function_other_types_to_date.cpp
index ec6e506d091..347612b1350 100644
--- a/be/src/vec/functions/function_other_types_to_date.cpp
+++ b/be/src/vec/functions/function_other_types_to_date.cpp
@@ -545,9 +545,9 @@ private:
auto& res =
static_cast<ColumnType*>(result_column->assume_mutable().get())->get_data();
for (size_t i = 0; i < input_rows_count; ++i) {
auto dt = binary_cast<NativeType, DateValueType>(data[i]);
- if (!dt.template datetime_trunc<Unit>()) {
- throw_out_of_bound_one_date<DateValueType>(name, data[i]);
- }
+ // datetime_trunc only raise only when dt invalid which is
impossible. so we dont throw error better.
+ // then we can use default implementation for nulls with no worry
of invalid nested value.
+ dt.template datetime_trunc<Unit>();
res[i] = binary_cast<DateValueType, NativeType>(dt);
}
}
@@ -735,9 +735,7 @@ struct UnixTimeStampDateImpl {
const auto& ts_value =
reinterpret_cast<const
DateV2Value<DateV2ValueType>&>(*source.data);
int64_t timestamp {};
- const auto valid =
- ts_value.unix_timestamp(×tamp,
context->state()->timezone_obj());
- DCHECK(valid);
+ ts_value.unix_timestamp(×tamp,
context->state()->timezone_obj());
col_result_data[i] = trim_timestamp(timestamp, NewVersion);
}
block.replace_by_position(result, std::move(col_result));
@@ -753,9 +751,7 @@ struct UnixTimeStampDateImpl {
const auto& ts_value =
reinterpret_cast<const
DateV2Value<DateTimeV2ValueType>&>(*source.data);
std::pair<int64_t, int64_t> timestamp {};
- const auto valid =
- ts_value.unix_timestamp(×tamp,
context->state()->timezone_obj());
- DCHECK(valid);
+ ts_value.unix_timestamp(×tamp,
context->state()->timezone_obj());
auto [sec, ms] = trim_timestamp(timestamp, NewVersion);
col_result_data[i] =
@@ -770,12 +766,7 @@ struct UnixTimeStampDateImpl {
}
};
-template <typename DateType, bool NewVersion = false>
-struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl<DateType,
NewVersion> {
- static DataTypes get_variadic_argument_types() { return
{std::make_shared<DateType>()}; }
-};
-
-// This impl doesn't use default impl to deal null value.
+// Handle nulls manually to prevent invalid default values from causing errors
template <bool NewVersion = false>
struct UnixTimeStampStrImpl {
static DataTypes get_variadic_argument_types() {
@@ -789,9 +780,15 @@ struct UnixTimeStampStrImpl {
return std::make_shared<DataTypeDecimal64>(16, 6);
}
+ static bool use_default_implementation_for_nulls() { return false; }
+
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
+ // Handle null map manually
+ auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+ NullMap& result_null_map =
assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+
ColumnPtr col_left = nullptr, col_right = nullptr;
bool source_const = false, format_const = false;
std::tie(col_left, source_const) =
@@ -799,12 +796,31 @@ struct UnixTimeStampStrImpl {
std::tie(col_right, format_const) =
unpack_if_const(block.get_by_position(arguments[1]).column);
+ // Update result null map from input null maps
+ const NullMap* null_map_left =
+
VectorizedUtils::get_null_map(block.get_by_position(arguments[0]).column);
+ const NullMap* null_map_right =
+
VectorizedUtils::get_null_map(block.get_by_position(arguments[1]).column);
+ if (null_map_left) {
+ VectorizedUtils::update_null_map(result_null_map, *null_map_left,
source_const);
+ }
+ if (null_map_right) {
+ VectorizedUtils::update_null_map(result_null_map, *null_map_right,
format_const);
+ }
+
+ // Extract nested columns
+ col_left = remove_nullable(col_left);
+ col_right = remove_nullable(col_right);
+
auto col_result = ColumnDecimal64::create(input_rows_count, 6);
auto& col_result_data = col_result->get_data();
const auto* col_source = assert_cast<const
ColumnString*>(col_left.get());
const auto* col_format = assert_cast<const
ColumnString*>(col_right.get());
- for (int i = 0; i < input_rows_count; i++) {
+ for (size_t i = 0; i < input_rows_count; i++) {
+ if (result_null_map[i]) {
+ continue;
+ }
StringRef source = col_source->get_data_at(index_check_const(i,
source_const));
StringRef fmt = col_format->get_data_at(index_check_const(i,
format_const));
@@ -829,7 +845,13 @@ struct UnixTimeStampStrImpl {
}
}
- block.replace_by_position(result, std::move(col_result));
+ if (null_map_left || null_map_right) {
+ block.replace_by_position(result,
+
ColumnNullable::create(std::move(col_result),
+
std::move(result_null_map_column)));
+ } else {
+ block.replace_by_position(result, std::move(col_result));
+ }
return Status::OK();
}
@@ -855,6 +877,13 @@ public:
return Impl::get_variadic_argument_types();
}
+ bool use_default_implementation_for_nulls() const override {
+ if constexpr (requires { Impl::use_default_implementation_for_nulls();
}) {
+ return Impl::use_default_implementation_for_nulls();
+ }
+ return true;
+ }
+
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const
override {
return Impl::execute_impl(context, block, arguments, result,
input_rows_count);
@@ -881,6 +910,13 @@ public:
return Impl::get_variadic_argument_types();
}
+ bool use_default_implementation_for_nulls() const override {
+ if constexpr (requires { Impl::use_default_implementation_for_nulls();
}) {
+ return Impl::use_default_implementation_for_nulls();
+ }
+ return true;
+ }
+
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const
override {
return Impl::execute_impl(context, block, arguments, result,
input_rows_count);
@@ -966,11 +1002,27 @@ public:
return {std::make_shared<typename
PrimitiveTypeTraits<PType>::DataType>()};
}
- //ATTN: no need to replace null value now because last_day and to_monday
both process boundary case well.
- // may need to change if support more functions
+ // Handle nulls manually to prevent invalid default values from causing
errors
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const
override {
- return Impl<PType>::execute_impl(context, block, arguments, result,
input_rows_count);
+ // Handle null map manually - update result null map from input null
maps upfront
+ auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+ NullMap& result_null_map =
assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+
+ ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
+ const NullMap* null_map =
VectorizedUtils::get_null_map(argument_column);
+ if (null_map) {
+ VectorizedUtils::update_null_map(result_null_map, *null_map);
+ }
+
+ // Extract nested column
+ argument_column = remove_nullable(argument_column);
+
+ return Impl<PType>::execute_impl(context, block, arguments, result,
input_rows_count,
+ argument_column, result_null_map,
+ std::move(result_null_map_column));
}
};
@@ -988,26 +1040,22 @@ struct LastDayImpl {
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
- size_t input_rows_count) {
+ size_t input_rows_count, const ColumnPtr&
argument_column,
+ NullMap& result_null_map,
+ ColumnUInt8::MutablePtr result_null_map_column)
{
const auto is_nullable =
block.get_by_position(result).type->is_nullable();
- ColumnPtr res_column;
- ColumnPtr argument_column =
remove_nullable(block.get_by_position(arguments[0]).column);
- if (is_nullable) {
- auto null_map = ColumnUInt8::create(input_rows_count, 0);
- auto data_col = assert_cast<const
ColumnType*>(argument_column.get());
- res_column = ResultColumnType::create(input_rows_count);
- execute_straight(
- input_rows_count, data_col->get_data(),
-
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
+ auto data_col = assert_cast<const ColumnType*>(argument_column.get());
+ auto res_column = ResultColumnType::create(input_rows_count);
+ execute_straight(
+ input_rows_count, data_col->get_data(),
+
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data(),
+ result_null_map);
+ if (is_nullable) {
block.replace_by_position(result,
- ColumnNullable::create(res_column,
std::move(null_map)));
+
ColumnNullable::create(std::move(res_column),
+
std::move(result_null_map_column)));
} else {
- auto data_col = assert_cast<const
ColumnType*>(argument_column.get());
- res_column = ResultColumnType::create(input_rows_count);
- execute_straight(
- input_rows_count, data_col->get_data(),
-
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
block.replace_by_position(result, std::move(res_column));
}
return Status::OK();
@@ -1015,8 +1063,12 @@ struct LastDayImpl {
static void execute_straight(size_t input_rows_count,
const PaddedPODArray<NativeType>& data_col,
- PaddedPODArray<ResultNativeType>& res_data) {
- for (int i = 0; i < input_rows_count; i++) {
+ PaddedPODArray<ResultNativeType>& res_data,
+ const NullMap& null_map) {
+ for (size_t i = 0; i < input_rows_count; i++) {
+ if (null_map[i]) {
+ continue;
+ }
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
if (!ts_value.is_valid_date()) {
@@ -1065,36 +1117,35 @@ struct ToMondayImpl {
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
- size_t input_rows_count) {
+ size_t input_rows_count, const ColumnPtr&
argument_column,
+ NullMap& result_null_map,
+ ColumnUInt8::MutablePtr result_null_map_column)
{
const auto is_nullable =
block.get_by_position(result).type->is_nullable();
- ColumnPtr argument_column =
remove_nullable(block.get_by_position(arguments[0]).column);
- ColumnPtr res_column;
- if (is_nullable) {
- auto null_map = ColumnUInt8::create(input_rows_count, 0);
- auto data_col = assert_cast<const
ColumnType*>(argument_column.get());
- res_column = ResultColumnType::create(input_rows_count);
- execute_straight(
- input_rows_count, data_col->get_data(),
-
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
+ auto data_col = assert_cast<const ColumnType*>(argument_column.get());
+ auto res_column = ResultColumnType::create(input_rows_count);
+ execute_straight(
+ input_rows_count, data_col->get_data(),
+
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data(),
+ result_null_map);
+ if (is_nullable) {
block.replace_by_position(result,
- ColumnNullable::create(res_column,
std::move(null_map)));
+
ColumnNullable::create(std::move(res_column),
+
std::move(result_null_map_column)));
} else {
- auto data_col = assert_cast<const
ColumnType*>(argument_column.get());
- res_column = ResultColumnType::create(input_rows_count);
- execute_straight(
- input_rows_count, data_col->get_data(),
-
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
block.replace_by_position(result, std::move(res_column));
}
return Status::OK();
}
- // v1, throws on invalid date
static void execute_straight(size_t input_rows_count,
const PaddedPODArray<NativeType>& data_col,
- PaddedPODArray<ResultNativeType>& res_data) {
- for (int i = 0; i < input_rows_count; i++) {
+ PaddedPODArray<ResultNativeType>& res_data,
+ const NullMap& null_map) {
+ for (size_t i = 0; i < input_rows_count; i++) {
+ if (null_map[i]) {
+ continue;
+ }
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
if (!ts_value.is_valid_date()) [[unlikely]] {
diff --git a/regression-test/data/correctness_p0/test_date_trunc_error.out
b/regression-test/data/correctness_p0/test_date_trunc_error.out
new file mode 100644
index 00000000000..5ba0c61f613
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_date_trunc_error.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 match 2024-01-15T10:23:45 2024-01-15T00:00 2024-01-31
2024-01-15 2024-01-15T10:23:45 1705285425.000000
+2 no_match \N \N \N \N \N \N
+3 match 2024-02-20T08:00 2024-02-20T00:00 2024-02-29
2024-02-19 2024-02-20T08:00 1708387200.000000
+
diff --git a/regression-test/suites/correctness_p0/test_date_trunc_error.groovy
b/regression-test/suites/correctness_p0/test_date_trunc_error.groovy
new file mode 100644
index 00000000000..68f770469fa
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_date_trunc_error.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_datelike_false_alarm") {
+ sql "DROP TABLE IF EXISTS dt_t_left;"
+ sql """
+ CREATE TABLE dt_t_left (
+ id INT,
+ name STRING
+ )
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql "DROP TABLE IF EXISTS dt_t_right;"
+ sql """
+ CREATE TABLE dt_t_right (
+ id INT,
+ event_time DATETIME
+ )
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """
+ INSERT INTO dt_t_left VALUES
+ (1, 'match'),
+ (2, 'no_match'),
+ (3, 'match');
+ """
+ sql """
+ INSERT INTO dt_t_right VALUES
+ (1, '2024-01-15 10:23:45'),
+ (3, '2024-02-20 08:00:00');
+ """
+ sql "DROP TABLE IF EXISTS dt_one_row;"
+ sql """
+ CREATE TABLE dt_one_row (
+ k INT
+ )
+ DISTRIBUTED BY HASH(k) BUCKETS 1
+ PROPERTIES ("replication_num" = "1");
+ """
+ sql "INSERT INTO dt_one_row VALUES (1);"
+
+ qt_sql """
+ SELECT
+ t.id,
+ t.name,
+ t.event_time,
+ date_trunc('day', t.event_time) AS trunc_day,
+ last_day(t.event_time) AS last_day,
+ to_monday(t.event_time) AS to_monday,
+ from_microsecond( unix_timestamp(t.event_time) * 1000000 ) AS
microsecond,
+ unix_timestamp( CAST(t.event_time AS VARCHAR), "%Y-%m-%d %H:%i:%s"
) AS unix_timestamp
+ FROM (
+ SELECT
+ l.id,
+ l.name,
+ r.event_time
+ FROM dt_t_left l
+ LEFT JOIN dt_t_right r
+ ON l.id = r.id
+ ) t
+ LEFT JOIN dt_one_row o
+ ON o.k = 1
+ ORDER BY t.id;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]