yiguolei commented on code in PR #22694:
URL: https://github.com/apache/doris/pull/22694#discussion_r1287368055
##########
be/src/util/timezone_utils.cpp:
##########
@@ -63,6 +63,172 @@ void TimezoneUtils::load_timezone_names() {
}
}
}
+
+namespace { // functions use only in this file
+
+template <typename T>
+T swapEndianness(T value) {
+ constexpr int numBytes = sizeof(T);
+ T result = 0;
+ for (int i = 0; i < numBytes; ++i) {
+ result = (result << 8) | ((value >> (8 * i)) & 0xFF);
+ }
+ return result;
+}
+
+template <typename T>
+T next_from_charstream(int8_t*& src) {
+ T value = *reinterpret_cast<T*>(src);
+ src += sizeof(T) / sizeof(int8_t);
+ if constexpr (std::endian::native == std::endian::little) {
+ return swapEndianness(
+ value); // timezone information files use network endianess,
which is big-endian
+ } else if (std::endian::native == std::endian::big) {
+ return value;
+ } else {
+ __builtin_unreachable();
+ }
+}
+
+std::pair<int8_t*, int> load_file_to_memory(const std::string& path) {
+ int fd = open(path.c_str(), O_RDONLY);
+ int len = lseek(fd, 0, SEEK_END); // bytes
+
+ int8_t* addr = (int8_t*)mmap(nullptr, len, PROT_READ, MAP_PRIVATE, fd, 0);
+ int8_t* data = new int8_t[len];
+ memcpy(data, addr, len);
+ close(fd);
+ munmap(addr, len);
+
+ return {data, len};
+}
+
+struct alignas(alignof(uint8_t)) ttinfo {
+ uint8_t tt_utoff[4]; // need force cast to int32_t
+ uint8_t tt_isdst;
+ uint8_t tt_desigidx;
+};
+constexpr static int TTINFO_SIZE = sizeof(ttinfo);
+static_assert(TTINFO_SIZE == 6);
+
+struct real_ttinfo {
+ [[maybe_unused]] real_ttinfo() = default; // actually it's used. how
stupid compiler!
+ real_ttinfo(const ttinfo& arg) {
+ diff_seconds = *reinterpret_cast<const int32_t*>(arg.tt_utoff + 0);
+ is_dst = arg.tt_isdst;
+ name_index = arg.tt_desigidx;
+ }
+
+ int32_t diff_seconds; // to UTC
+ bool is_dst;
+ uint8_t name_index;
+};
+
+template <>
+ttinfo next_from_charstream<ttinfo>(int8_t*& src) {
+ ttinfo value = *reinterpret_cast<ttinfo*>(src);
+ src += TTINFO_SIZE;
+ if constexpr (std::endian::native == std::endian::little) {
+ std::swap(value.tt_utoff[0], value.tt_utoff[3]);
+ std::swap(value.tt_utoff[1], value.tt_utoff[2]);
+ }
+ return value;
+}
+
+/*
+ * follow the rule of tzfile(5) which defined in
https://man7.org/linux/man-pages/man5/tzfile.5.html.
+ * should change when it changes.
+ */
+bool parse_load_timezone(vectorized::ZoneList& zone_list, int8_t* data, int
len,
+ bool first_time = true) {
+ int8_t* begin_pos = data;
+ /* HEADERS */
+ if (memcmp(data, "TZif", 4) != 0) [[unlikely]] { // magic number
+ return false;
+ }
+ data += 4;
+
+ // if version = 2, the whole header&data will repeat itself one time.
+ int8_t version = next_from_charstream<int8_t>(data) - '0';
+ data += 15; // null bits
+ int32_t ut_count = next_from_charstream<int32_t>(data);
+ int32_t wall_count = next_from_charstream<int32_t>(data);
+ int32_t leap_count = next_from_charstream<int32_t>(data);
+ int32_t trans_time_count = next_from_charstream<int32_t>(data);
+ int32_t type_count = next_from_charstream<int32_t>(data);
+ int32_t char_count = next_from_charstream<int32_t>(data);
+
+ /* HEADERS end, FIELDS begin*/
+ // transaction time points, which we don't need
+ data += (first_time ? 5 : 9) * trans_time_count;
+
+ // timezones
+ std::vector<real_ttinfo> timezones(type_count);
+ for (int i = 0; i < type_count; i++) {
+ ttinfo tz_data = next_from_charstream<ttinfo>(data);
+ timezones[i] = tz_data; // cast by c'tor
+ }
+
+ // timezone names
+ const char* name_zone = (char*)data;
+ data += char_count;
+
+ // concate names
+ for (auto& tz : timezones) {
+ int len = strlen(name_zone + tz.name_index);
+ zone_list.emplace(std::string {name_zone + tz.name_index, name_zone +
tz.name_index + len},
+
cctz::fixed_time_zone(cctz::seconds(tz.diff_seconds)));
+ }
+
+ // the second part.
+ if (version == 2 && first_time) {
+ // leap seconds, standard/wall indicators, UT/local indicators, which
we don't need
+ data += 4 * leap_count + wall_count + ut_count;
+
+ return (data < begin_pos + len) &&
+ parse_load_timezone(zone_list, data, len - (data - begin_pos),
false);
+ }
+
+ return true;
+}
+
+} // namespace
+
+void TimezoneUtils::load_timezones_to_cache(vectorized::ZoneList& cache_list) {
+ std::string base_str;
+ const char* tzdir = "/usr/share/zoneinfo"; // default
+ // try get from System
Review Comment:
you should make sure, both macos and linux are using /usr/share/zoneinfo
this file to save timezone info.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]