This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0ae98f8316678e8028f6fa0d18ab3d393dcd8a6d
Author: zclllyybb <[email protected]>
AuthorDate: Wed Sep 20 14:28:12 2023 +0800

    [fix](timezone) fix timezone parse when there is no tzfile (#24578)
---
 be/src/runtime/runtime_state.h              |  1 +
 be/src/util/timezone_utils.cpp              | 21 +++++++---
 be/src/util/timezone_utils.h                |  4 ++
 be/src/vec/functions/function_convert_tz.h  |  2 +-
 be/test/testutil/function_utils.cpp         | 10 +++--
 be/test/testutil/function_utils.h           |  3 +-
 be/test/vec/function/function_test_util.h   |  7 +++-
 be/test/vec/function/function_time_test.cpp | 60 ++++++++++++++++++++++++++++-
 8 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 9ef2eb3c7a..126e604629 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -84,6 +84,7 @@ public:
                 const TQueryGlobals& query_globals, ExecEnv* exec_env);
 
     // for ut and non-query.
+    void set_exec_env(ExecEnv* exec_env) { _exec_env = exec_env; }
     void init_mem_trackers(const TUniqueId& id = TUniqueId(), const 
std::string& name = "unknown");
 
     const TQueryOptions& query_options() const { return _query_options; }
diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp
index 14dd8b7e53..be8c282b3a 100644
--- a/be/src/util/timezone_utils.cpp
+++ b/be/src/util/timezone_utils.cpp
@@ -43,6 +43,12 @@ std::unordered_map<std::string, std::string> 
TimezoneUtils::timezone_names_map_;
 bool TimezoneUtils::inited_ = false;
 
 const std::string TimezoneUtils::default_time_zone = "+08:00";
+static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change 
by TZDIR env var
+
+void TimezoneUtils::clear_timezone_names() {
+    timezone_names_map_.clear();
+    inited_ = false;
+}
 
 void TimezoneUtils::load_timezone_names() {
     if (inited_) {
@@ -51,7 +57,6 @@ void TimezoneUtils::load_timezone_names() {
 
     inited_ = true;
     std::string path;
-    const char* tzdir = "/usr/share/zoneinfo";
     char* tzdir_env = std::getenv("TZDIR");
     if (tzdir_env && *tzdir_env) {
         tzdir = tzdir_env;
@@ -210,7 +215,6 @@ void 
TimezoneUtils::load_timezones_to_cache(vectorized::ZoneList& cache_list) {
     cache_list["CST"] = cctz::fixed_time_zone(cctz::seconds(8 * 3600));
 
     std::string base_str;
-    const char* tzdir = "/usr/share/zoneinfo"; // default
     // try get from System
     char* tzdir_env = std::getenv("TZDIR");
     if (tzdir_env && *tzdir_env) {
@@ -221,6 +225,11 @@ void 
TimezoneUtils::load_timezones_to_cache(vectorized::ZoneList& cache_list) {
     base_str += '/';
 
     const auto root_path = std::filesystem::path {base_str};
+    if (!std::filesystem::exists(root_path)) {
+        LOG_WARNING("Cannot find system tzfile. Abandon to preload timezone 
cache.");
+        return;
+    }
+
     std::set<std::string> ignore_paths = {"posix", "right"}; // duplications
 
     for (std::filesystem::recursive_directory_iterator it {base_str}; it != 
end(it); it++) {
@@ -295,11 +304,11 @@ bool TimezoneUtils::find_cctz_time_zone(const 
std::string& timezone, cctz::time_
             tz_parsed = true;
         } else {
             auto it = timezone_names_map_.find(timezone_lower);
-            if (it == timezone_names_map_.end()) {
-                VLOG_DEBUG << "Illegal timezone " << timezone_lower;
-                return false;
+            if (it != timezone_names_map_.end()) {
+                tz_parsed = cctz::load_time_zone(it->second, &ctz);
+            } else {
+                tz_parsed = cctz::load_time_zone(timezone, &ctz);
             }
-            tz_parsed = cctz::load_time_zone(it->second, &ctz);
         }
         if (tz_parsed) {
             if (!have_both) { // GMT only
diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h
index 0f3a6dcc38..55f7eace20 100644
--- a/be/src/util/timezone_utils.h
+++ b/be/src/util/timezone_utils.h
@@ -37,8 +37,12 @@ class TimezoneUtils {
 public:
     static void load_timezone_names();
     static void load_timezones_to_cache(vectorized::ZoneList& cache_list);
+    // we support to parse lower_case timezone name iff execution environment 
has timezone file
     static bool find_cctz_time_zone(const std::string& timezone, 
cctz::time_zone& ctz);
 
+    // for ut only
+    static void clear_timezone_names();
+
     static const std::string default_time_zone;
 
 private:
diff --git a/be/src/vec/functions/function_convert_tz.h 
b/be/src/vec/functions/function_convert_tz.h
index 8ff3505aca..d2db44c117 100644
--- a/be/src/vec/functions/function_convert_tz.h
+++ b/be/src/vec/functions/function_convert_tz.h
@@ -139,7 +139,7 @@ struct ConvertTZImpl {
             std::unique_lock<std::shared_mutex> lock_(cache_lock);
             //TODO: the lock upgrade could be done in find_... function only 
when we push value into the hashmap
             if (!TimezoneUtils::find_cctz_time_zone(from_tz, 
time_zone_cache[from_tz])) {
-                time_zone_cache.erase(to_tz);
+                time_zone_cache.erase(from_tz);
                 result_null_map[index_now] = true;
                 result_column->insert_default();
                 return;
diff --git a/be/test/testutil/function_utils.cpp 
b/be/test/testutil/function_utils.cpp
index 6a87bb7009..150d81f339 100644
--- a/be/test/testutil/function_utils.cpp
+++ b/be/test/testutil/function_utils.cpp
@@ -39,13 +39,17 @@ FunctionUtils::FunctionUtils() {
 
 FunctionUtils::FunctionUtils(const doris::TypeDescriptor& return_type,
                              const std::vector<doris::TypeDescriptor>& 
arg_types,
-                             int varargs_buffer_size) {
+                             int varargs_buffer_size, RuntimeState* state = 
nullptr) {
     TQueryGlobals globals;
     globals.__set_now_string("2019-08-06 01:38:57");
     globals.__set_timestamp_ms(1565026737805);
     globals.__set_time_zone("Asia/Shanghai");
-    _state = RuntimeState::create_unique(globals).release();
-    _fn_ctx = FunctionContext::create_context(_state, return_type, arg_types);
+    if (state == nullptr) {
+        _state = RuntimeState::create_unique(globals).release();
+        _fn_ctx = FunctionContext::create_context(_state, return_type, 
arg_types);
+    } else {
+        _fn_ctx = FunctionContext::create_context(state, return_type, 
arg_types);
+    }
 }
 
 FunctionUtils::~FunctionUtils() {
diff --git a/be/test/testutil/function_utils.h 
b/be/test/testutil/function_utils.h
index fbb641eb07..f952ae0ed2 100644
--- a/be/test/testutil/function_utils.h
+++ b/be/test/testutil/function_utils.h
@@ -29,7 +29,8 @@ class FunctionUtils {
 public:
     FunctionUtils();
     FunctionUtils(const doris::TypeDescriptor& return_type,
-                  const std::vector<doris::TypeDescriptor>& arg_types, int 
varargs_buffer_size);
+                  const std::vector<doris::TypeDescriptor>& arg_types, int 
varargs_buffer_size,
+                  RuntimeState*);
     ~FunctionUtils();
 
     doris::FunctionContext* get_fn_ctx() { return _fn_ctx.get(); }
diff --git a/be/test/vec/function/function_test_util.h 
b/be/test/vec/function/function_test_util.h
index fc9fb8a60d..c543612439 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -29,6 +29,7 @@
 #include "gtest/gtest_pred_impl.h"
 #include "olap/olap_common.h"
 #include "runtime/define_primitive_type.h"
+#include "runtime/exec_env.h"
 #include "runtime/types.h"
 #include "testutil/any_type.h"
 #include "testutil/function_utils.h"
@@ -199,9 +200,11 @@ void check_vec_table_function(TableFunction* fn, const 
InputTypeSet& input_types
 // Null values are represented by Null()
 // The type of the constant column is represented as follows: Consted 
{TypeIndex::String}
 // A DataSet with a constant column can only have one row of data
+// If state != nullptr, should set query options you use for your own.
 template <typename ReturnType, bool nullable = false>
 Status check_function(const std::string& func_name, const InputTypeSet& 
input_types,
-                      const DataSet& data_set, bool expect_fail = false) {
+                      const DataSet& data_set, bool expect_fail = false,
+                      RuntimeState* state = nullptr) {
     // 1.0 create data type
     ut_type::UTDataTypeDescs descs;
     EXPECT_TRUE(parse_ut_data_type(input_types, descs));
@@ -270,7 +273,7 @@ Status check_function(const std::string& func_name, const 
InputTypeSet& input_ty
         fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
     }
 
-    FunctionUtils fn_utils(fn_ctx_return, arg_types, 0);
+    FunctionUtils fn_utils(fn_ctx_return, arg_types, 0, state);
     auto* fn_ctx = fn_utils.get_fn_ctx();
     fn_ctx->set_constant_cols(constant_cols);
     func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
diff --git a/be/test/vec/function/function_time_test.cpp 
b/be/test/vec/function/function_time_test.cpp
index e6281bc8f4..ba4e352f82 100644
--- a/be/test/vec/function/function_time_test.cpp
+++ b/be/test/vec/function/function_time_test.cpp
@@ -23,12 +23,12 @@
 
 #include "common/status.h"
 #include "function_test_util.h"
-#include "gtest/gtest_pred_impl.h"
+#include "runtime/runtime_state.h"
 #include "testutil/any_type.h"
+#include "util/timezone_utils.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type_date.h"
 #include "vec/data_types/data_type_date_time.h"
-#include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
 #include "vec/data_types/data_type_time.h"
@@ -201,6 +201,62 @@ TEST(VTimestampFunctionsTest, timediff_test) {
     check_function<DataTypeTimeV2, true>(func_name, input_types, data_set);
 }
 
+TEST(VTimestampFunctionsTest, convert_tz_test) {
+    std::string func_name = "convert_tz";
+
+    ExecEnv* exec_env = ExecEnv::GetInstance();
+    exec_env->_global_zone_cache = std::make_unique<vectorized::ZoneList>();
+    auto test_state = RuntimeState::create_unique();
+    test_state->set_exec_env(exec_env);
+    TimezoneUtils::clear_timezone_names();
+
+    InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::String, 
TypeIndex::String};
+
+    {
+        DataSet data_set = {{{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/SHANGHAI"},
+                              std::string {"america/Los_angeles"}},
+                             Null()}};
+        check_function<DataTypeDateTimeV2, true>(func_name, input_types, 
data_set, false,
+                                                 test_state.get());
+    }
+
+    {
+        DataSet data_set = {{{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/Shanghai"},
+                              std::string {"UTC"}},
+                             str_to_datetime_v2("2019-07-31 18:18:27", 
"%Y-%m-%d %H:%i:%s.%f")},
+                            {{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/Shanghai"},
+                              std::string {"Utc"}},
+                             Null()},
+                            {{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/Shanghai"},
+                              std::string {"UTC"}},
+                             str_to_datetime_v2("2019-07-31 18:18:27", 
"%Y-%m-%d %H:%i:%s.%f")},
+                            {{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/SHANGHAI"},
+                              std::string {"america/Los_angeles"}},
+                             Null()}};
+        check_function<DataTypeDateTimeV2, true>(func_name, input_types, 
data_set, false,
+                                                 test_state.get());
+    }
+
+    {
+        DataSet data_set = {{{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/Shanghai"},
+                              std::string {"UTC"}},
+                             str_to_datetime_v2("2019-07-31 18:18:27", 
"%Y-%m-%d %H:%i:%s.%f")},
+                            {{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/Shanghai"},
+                              std::string {"Utc"}},
+                             str_to_datetime_v2("2019-07-31 18:18:27", 
"%Y-%m-%d %H:%i:%s.%f")},
+                            {{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/Shanghai"},
+                              std::string {"UTC"}},
+                             str_to_datetime_v2("2019-07-31 18:18:27", 
"%Y-%m-%d %H:%i:%s.%f")},
+                            {{std::string {"2019-08-01 02:18:27"}, std::string 
{"Asia/SHANGHAI"},
+                              std::string {"america/Los_angeles"}},
+                             str_to_datetime_v2("2019-07-31 11:18:27", 
"%Y-%m-%d %H:%i:%s.%f")}};
+        TimezoneUtils::load_timezone_names();
+        TimezoneUtils::load_timezones_to_cache(*exec_env->_global_zone_cache);
+        check_function<DataTypeDateTimeV2, true>(func_name, input_types, 
data_set, false,
+                                                 test_state.get());
+    }
+}
+
 TEST(VTimestampFunctionsTest, date_format_test) {
     std::string func_name = "date_format";
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to