This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new a7d727b15c4 branch-4.0: [Fix](function) avoid false alarm of some 
datelike functions #59897 (#59931)
a7d727b15c4 is described below

commit a7d727b15c4c50db4e31dd8f3729293fb81773b6
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 16 10:39:34 2026 +0800

    branch-4.0: [Fix](function) avoid false alarm of some datelike functions 
#59897 (#59931)
    
    Cherry-picked from #59897
    
    Co-authored-by: zclllyybb <[email protected]>
---
 .../function_date_or_datetime_computation.h        |  32 +++-
 .../vec/functions/function_other_types_to_date.cpp | 165 ++++++++++++++-------
 .../data/correctness_p0/test_date_trunc_error.out  |   6 +
 .../correctness_p0/test_date_trunc_error.groovy    |  85 +++++++++++
 4 files changed, 227 insertions(+), 61 deletions(-)

diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h 
b/be/src/vec/functions/function_date_or_datetime_computation.h
index c6b3b5a9899..ec1c6ff36b3 100644
--- a/be/src/vec/functions/function_date_or_datetime_computation.h
+++ b/be/src/vec/functions/function_date_or_datetime_computation.h
@@ -1127,16 +1127,34 @@ struct TimestampToDateTime : IFunction {
 
     static FunctionPtr create() { return 
std::make_shared<TimestampToDateTime<Impl>>(); }
 
+    // Handle nulls manually to prevent invalid default values from causing 
errors
+    bool use_default_implementation_for_nulls() const override { return false; 
}
+
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         uint32_t result, size_t input_rows_count) const 
override {
-        const auto& arg_col = block.get_by_position(arguments[0]).column;
-        const auto& column_data = assert_cast<const ColumnInt64&>(*arg_col);
+        // Handle null map manually
+        auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+        NullMap& result_null_map = 
assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+
+        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
+        const NullMap* null_map = 
VectorizedUtils::get_null_map(argument_column);
+        if (null_map) {
+            VectorizedUtils::update_null_map(result_null_map, *null_map);
+        }
+
+        // Extract nested column
+        argument_column = remove_nullable(argument_column);
+
+        const auto& column_data = assert_cast<const 
ColumnInt64&>(*argument_column);
         auto res_col = ColumnDateTimeV2::create();
         res_col->get_data().resize_fill(input_rows_count, 0);
         auto& res_data = res_col->get_data();
         const cctz::time_zone& time_zone = context->state()->timezone_obj();
 
-        for (int i = 0; i < input_rows_count; ++i) {
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (result_null_map[i]) {
+                continue;
+            }
             Int64 value = column_data.get_element(i);
             if (value < 0) [[unlikely]] {
                 throw_out_of_bound_int(name, value);
@@ -1151,7 +1169,13 @@ struct TimestampToDateTime : IFunction {
             dt.set_microsecond((value % Impl::ratio) * ratio_to_micro);
         }
 
-        block.replace_by_position(result, std::move(res_col));
+        if (null_map) {
+            block.replace_by_position(
+                    result,
+                    ColumnNullable::create(std::move(res_col), 
std::move(result_null_map_column)));
+        } else {
+            block.replace_by_position(result, std::move(res_col));
+        }
         return Status::OK();
     }
 };
diff --git a/be/src/vec/functions/function_other_types_to_date.cpp 
b/be/src/vec/functions/function_other_types_to_date.cpp
index ec6e506d091..347612b1350 100644
--- a/be/src/vec/functions/function_other_types_to_date.cpp
+++ b/be/src/vec/functions/function_other_types_to_date.cpp
@@ -545,9 +545,9 @@ private:
         auto& res = 
static_cast<ColumnType*>(result_column->assume_mutable().get())->get_data();
         for (size_t i = 0; i < input_rows_count; ++i) {
             auto dt = binary_cast<NativeType, DateValueType>(data[i]);
-            if (!dt.template datetime_trunc<Unit>()) {
-                throw_out_of_bound_one_date<DateValueType>(name, data[i]);
-            }
+            // datetime_trunc only raise only when dt invalid which is 
impossible. so we dont throw error better.
+            // then we can use default implementation for nulls with no worry 
of invalid nested value.
+            dt.template datetime_trunc<Unit>();
             res[i] = binary_cast<DateValueType, NativeType>(dt);
         }
     }
@@ -735,9 +735,7 @@ struct UnixTimeStampDateImpl {
                 const auto& ts_value =
                         reinterpret_cast<const 
DateV2Value<DateV2ValueType>&>(*source.data);
                 int64_t timestamp {};
-                const auto valid =
-                        ts_value.unix_timestamp(&timestamp, 
context->state()->timezone_obj());
-                DCHECK(valid);
+                ts_value.unix_timestamp(&timestamp, 
context->state()->timezone_obj());
                 col_result_data[i] = trim_timestamp(timestamp, NewVersion);
             }
             block.replace_by_position(result, std::move(col_result));
@@ -753,9 +751,7 @@ struct UnixTimeStampDateImpl {
                 const auto& ts_value =
                         reinterpret_cast<const 
DateV2Value<DateTimeV2ValueType>&>(*source.data);
                 std::pair<int64_t, int64_t> timestamp {};
-                const auto valid =
-                        ts_value.unix_timestamp(&timestamp, 
context->state()->timezone_obj());
-                DCHECK(valid);
+                ts_value.unix_timestamp(&timestamp, 
context->state()->timezone_obj());
 
                 auto [sec, ms] = trim_timestamp(timestamp, NewVersion);
                 col_result_data[i] =
@@ -770,12 +766,7 @@ struct UnixTimeStampDateImpl {
     }
 };
 
-template <typename DateType, bool NewVersion = false>
-struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl<DateType, 
NewVersion> {
-    static DataTypes get_variadic_argument_types() { return 
{std::make_shared<DateType>()}; }
-};
-
-// This impl doesn't use default impl to deal null value.
+// Handle nulls manually to prevent invalid default values from causing errors
 template <bool NewVersion = false>
 struct UnixTimeStampStrImpl {
     static DataTypes get_variadic_argument_types() {
@@ -789,9 +780,15 @@ struct UnixTimeStampStrImpl {
         return std::make_shared<DataTypeDecimal64>(16, 6);
     }
 
+    static bool use_default_implementation_for_nulls() { return false; }
+
     static Status execute_impl(FunctionContext* context, Block& block,
                                const ColumnNumbers& arguments, uint32_t result,
                                size_t input_rows_count) {
+        // Handle null map manually
+        auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+        NullMap& result_null_map = 
assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+
         ColumnPtr col_left = nullptr, col_right = nullptr;
         bool source_const = false, format_const = false;
         std::tie(col_left, source_const) =
@@ -799,12 +796,31 @@ struct UnixTimeStampStrImpl {
         std::tie(col_right, format_const) =
                 unpack_if_const(block.get_by_position(arguments[1]).column);
 
+        // Update result null map from input null maps
+        const NullMap* null_map_left =
+                
VectorizedUtils::get_null_map(block.get_by_position(arguments[0]).column);
+        const NullMap* null_map_right =
+                
VectorizedUtils::get_null_map(block.get_by_position(arguments[1]).column);
+        if (null_map_left) {
+            VectorizedUtils::update_null_map(result_null_map, *null_map_left, 
source_const);
+        }
+        if (null_map_right) {
+            VectorizedUtils::update_null_map(result_null_map, *null_map_right, 
format_const);
+        }
+
+        // Extract nested columns
+        col_left = remove_nullable(col_left);
+        col_right = remove_nullable(col_right);
+
         auto col_result = ColumnDecimal64::create(input_rows_count, 6);
         auto& col_result_data = col_result->get_data();
 
         const auto* col_source = assert_cast<const 
ColumnString*>(col_left.get());
         const auto* col_format = assert_cast<const 
ColumnString*>(col_right.get());
-        for (int i = 0; i < input_rows_count; i++) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            if (result_null_map[i]) {
+                continue;
+            }
             StringRef source = col_source->get_data_at(index_check_const(i, 
source_const));
             StringRef fmt = col_format->get_data_at(index_check_const(i, 
format_const));
 
@@ -829,7 +845,13 @@ struct UnixTimeStampStrImpl {
             }
         }
 
-        block.replace_by_position(result, std::move(col_result));
+        if (null_map_left || null_map_right) {
+            block.replace_by_position(result,
+                                      
ColumnNullable::create(std::move(col_result),
+                                                             
std::move(result_null_map_column)));
+        } else {
+            block.replace_by_position(result, std::move(col_result));
+        }
 
         return Status::OK();
     }
@@ -855,6 +877,13 @@ public:
         return Impl::get_variadic_argument_types();
     }
 
+    bool use_default_implementation_for_nulls() const override {
+        if constexpr (requires { Impl::use_default_implementation_for_nulls(); 
}) {
+            return Impl::use_default_implementation_for_nulls();
+        }
+        return true;
+    }
+
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         uint32_t result, size_t input_rows_count) const 
override {
         return Impl::execute_impl(context, block, arguments, result, 
input_rows_count);
@@ -881,6 +910,13 @@ public:
         return Impl::get_variadic_argument_types();
     }
 
+    bool use_default_implementation_for_nulls() const override {
+        if constexpr (requires { Impl::use_default_implementation_for_nulls(); 
}) {
+            return Impl::use_default_implementation_for_nulls();
+        }
+        return true;
+    }
+
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         uint32_t result, size_t input_rows_count) const 
override {
         return Impl::execute_impl(context, block, arguments, result, 
input_rows_count);
@@ -966,11 +1002,27 @@ public:
         return {std::make_shared<typename 
PrimitiveTypeTraits<PType>::DataType>()};
     }
 
-    //ATTN: no need to replace null value now because last_day and to_monday 
both process boundary case well.
-    // may need to change if support more functions
+    // Handle nulls manually to prevent invalid default values from causing 
errors
+    bool use_default_implementation_for_nulls() const override { return false; 
}
+
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         uint32_t result, size_t input_rows_count) const 
override {
-        return Impl<PType>::execute_impl(context, block, arguments, result, 
input_rows_count);
+        // Handle null map manually - update result null map from input null 
maps upfront
+        auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+        NullMap& result_null_map = 
assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
+
+        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
+        const NullMap* null_map = 
VectorizedUtils::get_null_map(argument_column);
+        if (null_map) {
+            VectorizedUtils::update_null_map(result_null_map, *null_map);
+        }
+
+        // Extract nested column
+        argument_column = remove_nullable(argument_column);
+
+        return Impl<PType>::execute_impl(context, block, arguments, result, 
input_rows_count,
+                                         argument_column, result_null_map,
+                                         std::move(result_null_map_column));
     }
 };
 
@@ -988,26 +1040,22 @@ struct LastDayImpl {
 
     static Status execute_impl(FunctionContext* context, Block& block,
                                const ColumnNumbers& arguments, uint32_t result,
-                               size_t input_rows_count) {
+                               size_t input_rows_count, const ColumnPtr& 
argument_column,
+                               NullMap& result_null_map,
+                               ColumnUInt8::MutablePtr result_null_map_column) 
{
         const auto is_nullable = 
block.get_by_position(result).type->is_nullable();
-        ColumnPtr res_column;
-        ColumnPtr argument_column = 
remove_nullable(block.get_by_position(arguments[0]).column);
-        if (is_nullable) {
-            auto null_map = ColumnUInt8::create(input_rows_count, 0);
-            auto data_col = assert_cast<const 
ColumnType*>(argument_column.get());
-            res_column = ResultColumnType::create(input_rows_count);
-            execute_straight(
-                    input_rows_count, data_col->get_data(),
-                    
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
+        auto data_col = assert_cast<const ColumnType*>(argument_column.get());
+        auto res_column = ResultColumnType::create(input_rows_count);
+        execute_straight(
+                input_rows_count, data_col->get_data(),
+                
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data(),
+                result_null_map);
 
+        if (is_nullable) {
             block.replace_by_position(result,
-                                      ColumnNullable::create(res_column, 
std::move(null_map)));
+                                      
ColumnNullable::create(std::move(res_column),
+                                                             
std::move(result_null_map_column)));
         } else {
-            auto data_col = assert_cast<const 
ColumnType*>(argument_column.get());
-            res_column = ResultColumnType::create(input_rows_count);
-            execute_straight(
-                    input_rows_count, data_col->get_data(),
-                    
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
             block.replace_by_position(result, std::move(res_column));
         }
         return Status::OK();
@@ -1015,8 +1063,12 @@ struct LastDayImpl {
 
     static void execute_straight(size_t input_rows_count,
                                  const PaddedPODArray<NativeType>& data_col,
-                                 PaddedPODArray<ResultNativeType>& res_data) {
-        for (int i = 0; i < input_rows_count; i++) {
+                                 PaddedPODArray<ResultNativeType>& res_data,
+                                 const NullMap& null_map) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            if (null_map[i]) {
+                continue;
+            }
             const auto& cur_data = data_col[i];
             auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
             if (!ts_value.is_valid_date()) {
@@ -1065,36 +1117,35 @@ struct ToMondayImpl {
 
     static Status execute_impl(FunctionContext* context, Block& block,
                                const ColumnNumbers& arguments, uint32_t result,
-                               size_t input_rows_count) {
+                               size_t input_rows_count, const ColumnPtr& 
argument_column,
+                               NullMap& result_null_map,
+                               ColumnUInt8::MutablePtr result_null_map_column) 
{
         const auto is_nullable = 
block.get_by_position(result).type->is_nullable();
-        ColumnPtr argument_column = 
remove_nullable(block.get_by_position(arguments[0]).column);
-        ColumnPtr res_column;
-        if (is_nullable) {
-            auto null_map = ColumnUInt8::create(input_rows_count, 0);
-            auto data_col = assert_cast<const 
ColumnType*>(argument_column.get());
-            res_column = ResultColumnType::create(input_rows_count);
-            execute_straight(
-                    input_rows_count, data_col->get_data(),
-                    
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
+        auto data_col = assert_cast<const ColumnType*>(argument_column.get());
+        auto res_column = ResultColumnType::create(input_rows_count);
+        execute_straight(
+                input_rows_count, data_col->get_data(),
+                
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data(),
+                result_null_map);
 
+        if (is_nullable) {
             block.replace_by_position(result,
-                                      ColumnNullable::create(res_column, 
std::move(null_map)));
+                                      
ColumnNullable::create(std::move(res_column),
+                                                             
std::move(result_null_map_column)));
         } else {
-            auto data_col = assert_cast<const 
ColumnType*>(argument_column.get());
-            res_column = ResultColumnType::create(input_rows_count);
-            execute_straight(
-                    input_rows_count, data_col->get_data(),
-                    
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
             block.replace_by_position(result, std::move(res_column));
         }
         return Status::OK();
     }
 
-    // v1, throws on invalid date
     static void execute_straight(size_t input_rows_count,
                                  const PaddedPODArray<NativeType>& data_col,
-                                 PaddedPODArray<ResultNativeType>& res_data) {
-        for (int i = 0; i < input_rows_count; i++) {
+                                 PaddedPODArray<ResultNativeType>& res_data,
+                                 const NullMap& null_map) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            if (null_map[i]) {
+                continue;
+            }
             const auto& cur_data = data_col[i];
             auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
             if (!ts_value.is_valid_date()) [[unlikely]] {
diff --git a/regression-test/data/correctness_p0/test_date_trunc_error.out 
b/regression-test/data/correctness_p0/test_date_trunc_error.out
new file mode 100644
index 00000000000..5ba0c61f613
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_date_trunc_error.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      match   2024-01-15T10:23:45     2024-01-15T00:00        2024-01-31      
2024-01-15      2024-01-15T10:23:45     1705285425.000000
+2      no_match        \N      \N      \N      \N      \N      \N
+3      match   2024-02-20T08:00        2024-02-20T00:00        2024-02-29      
2024-02-19      2024-02-20T08:00        1708387200.000000
+
diff --git a/regression-test/suites/correctness_p0/test_date_trunc_error.groovy 
b/regression-test/suites/correctness_p0/test_date_trunc_error.groovy
new file mode 100644
index 00000000000..68f770469fa
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_date_trunc_error.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_datelike_false_alarm") {
+    sql "DROP TABLE IF EXISTS dt_t_left;"
+    sql """
+        CREATE TABLE dt_t_left (
+            id INT,
+            name STRING
+        )
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        );
+    """
+    sql "DROP TABLE IF EXISTS dt_t_right;"
+    sql """
+        CREATE TABLE dt_t_right (
+            id INT,
+            event_time DATETIME
+        )
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        );
+    """
+    sql """
+        INSERT INTO dt_t_left VALUES
+        (1, 'match'),
+        (2, 'no_match'),
+        (3, 'match');
+    """
+    sql """ 
+        INSERT INTO dt_t_right VALUES
+        (1, '2024-01-15 10:23:45'),
+        (3, '2024-02-20 08:00:00');
+    """
+    sql "DROP TABLE IF EXISTS dt_one_row;"
+    sql """
+        CREATE TABLE dt_one_row (
+            k INT
+        )
+        DISTRIBUTED BY HASH(k) BUCKETS 1
+        PROPERTIES ("replication_num" = "1");
+    """
+    sql "INSERT INTO dt_one_row VALUES (1);"
+
+    qt_sql """
+        SELECT
+            t.id,
+            t.name,
+            t.event_time,
+            date_trunc('day', t.event_time) AS trunc_day,
+            last_day(t.event_time) AS last_day,
+            to_monday(t.event_time) AS to_monday,
+            from_microsecond( unix_timestamp(t.event_time) * 1000000 ) AS 
microsecond,
+            unix_timestamp( CAST(t.event_time AS VARCHAR), "%Y-%m-%d %H:%i:%s" 
) AS unix_timestamp
+        FROM (
+            SELECT
+                l.id,
+                l.name,
+                r.event_time
+            FROM dt_t_left l
+            LEFT JOIN dt_t_right r
+                ON l.id = r.id
+        ) t
+        LEFT JOIN dt_one_row o
+            ON o.k = 1
+        ORDER BY t.id;
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to