This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 08832d9f3a9 [Fix](exec) Fix date dict dead loop. (#25570)
08832d9f3a9 is described below
commit 08832d9f3a94acff981140279bba443b69751e14
Author: Qi Chen <[email protected]>
AuthorDate: Tue Oct 24 02:51:43 2023 +0800
[Fix](exec) Fix date dict dead loop. (#25570)
---
be/src/util/time_lut.cpp | 2 +-
be/src/vec/exec/format/orc/vorc_reader.cpp | 2 +-
be/src/vec/exec/format/orc/vorc_reader.h | 2 +-
be/src/vec/exec/format/parquet/decoder.cpp | 2 +-
be/src/vec/exec/format/parquet/decoder.h | 2 +-
be/src/vec/runtime/vdatetime_value.cpp | 26 +++--
be/src/vec/runtime/vdatetime_value.h | 18 +--
be/test/vec/runtime/vdatetime_value_test.cpp | 168 +++++++++++++++++++++++++++
8 files changed, 201 insertions(+), 21 deletions(-)
diff --git a/be/src/util/time_lut.cpp b/be/src/util/time_lut.cpp
index 9be2dec4fc7..616541d411f 100644
--- a/be/src/util/time_lut.cpp
+++ b/be/src/util/time_lut.cpp
@@ -96,7 +96,7 @@ uint8_t calc_weekday(uint64_t day_nr, bool
is_sunday_first_day) {
}
uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day) {
- // date_day_offet_dict range from [1900-01-01, 2039-10-24]
+ // date_day_offet_dict range from [1900-01-01, 2039-12-31]
if (date_day_offset_dict::can_speed_up_calc_daynr(year) &&
LIKELY(date_day_offset_dict::get_dict_init())) {
return date_day_offset_dict::get().daynr(year, month, day);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index c1228c9d454..212540c0d8e 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -149,7 +149,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState*
state,
TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
VecDateTimeValue t;
t.from_unixtime(0, ctz);
- _offset_days = t.day() == 31 ? 0 : 1;
+ _offset_days = t.day() == 31 ? -1 : 0; // If 1969-12-31, then returns -1.
_init_profile();
_init_system_properties();
_init_file_description();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index c41b8718d75..feb1b700626 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -500,7 +500,7 @@ private:
int64_t _range_size;
const std::string& _ctz;
const std::vector<std::string>* _column_names;
- size_t _offset_days = 0;
+ int32_t _offset_days = 0;
cctz::time_zone _time_zone;
std::list<std::string> _read_cols;
diff --git a/be/src/vec/exec/format/parquet/decoder.cpp
b/be/src/vec/exec/format/parquet/decoder.cpp
index bf8ef0b2330..0a158176091 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema,
cctz::time_zone* ctz) {
if (_decode_params->ctz) {
VecDateTimeValue t;
t.from_unixtime(0, *_decode_params->ctz);
- _decode_params->offset_days = t.day() == 31 ? 0 : 1;
+ _decode_params->offset_days = t.day() == 31 ? -1 : 0; // If
1969-12-31, then returns -1.
}
}
} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/decoder.h
b/be/src/vec/exec/format/parquet/decoder.h
index 6c1030818cd..acd9965bad8 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -71,7 +71,7 @@ struct DecodeParams {
static const cctz::time_zone utc0;
// schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the
time zone
cctz::time_zone* ctz = nullptr;
- size_t offset_days = 0;
+ int32_t offset_days = 0;
int64_t second_mask = 1;
int64_t scale_to_nano_factor = 1;
DecimalScaleParams decimal_scale;
diff --git a/be/src/vec/runtime/vdatetime_value.cpp
b/be/src/vec/runtime/vdatetime_value.cpp
index 201548b1642..3e24c1ffe0a 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -2667,10 +2667,10 @@ template <typename T>
typename DateV2Value<T>::underlying_value DateV2Value<T>::to_date_int_val()
const {
return int_val_;
}
-
+// [1900-01-01, 2039-12-31]
static std::array<DateV2Value<DateV2ValueType>,
date_day_offset_dict::DICT_DAYS>
DATE_DAY_OFFSET_ITEMS;
-
+// [1900-01-01, 2039-12-31]
static std::array<std::array<std::array<int, 31>, 12>, 140>
DATE_DAY_OFFSET_DICT;
static bool DATE_DAY_OFFSET_ITEMS_INIT = false;
@@ -2687,19 +2687,27 @@ bool date_day_offset_dict::get_dict_init() {
date_day_offset_dict::date_day_offset_dict() {
DateV2Value<DateV2ValueType> d;
+ // Init days before epoch.
d.set_time(1969, 12, 31, 0, 0, 0, 0);
- for (int i = 0; i < DAY_AFTER_EPOCH; ++i) {
- DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + i] = d;
+ for (int i = 0; i < DAY_BEFORE_EPOCH; ++i) {
+ DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i - 1] = d;
DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() -
1] =
calc_daynr(d.year(), d.month(), d.day());
- d += 1;
+ d -= 1;
}
- d.set_time(1969, 12, 31, 0, 0, 0, 0);
- for (int i = 0; i <= DAY_BEFORE_EPOCH; ++i) {
- DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i] = d;
+ // Init epoch day.
+ d.set_time(1970, 1, 1, 0, 0, 0, 0);
+ DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH] = d;
+ DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] =
+ calc_daynr(d.year(), d.month(), d.day());
+ d += 1;
+
+ // Init days after epoch.
+ for (int i = 0; i < DAY_AFTER_EPOCH; ++i) {
+ DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + 1 + i] = d;
DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() -
1] =
calc_daynr(d.year(), d.month(), d.day());
- d -= 1;
+ d += 1;
}
DATE_DAY_OFFSET_ITEMS_INIT = true;
diff --git a/be/src/vec/runtime/vdatetime_value.h
b/be/src/vec/runtime/vdatetime_value.h
index 97c82f68bbe..b03c09a55d3 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -1516,6 +1516,9 @@ int64_t datetime_diff(const VecDateTimeValue& ts_value1,
const DateV2Value<T>& t
return 0;
}
+/**
+ * Date dict table. date range is [1900-01-01, 2039-12-31].
+ */
class date_day_offset_dict {
private:
static date_day_offset_dict instance;
@@ -1526,15 +1529,16 @@ private:
date_day_offset_dict& operator=(const date_day_offset_dict&) = default;
public:
- static constexpr int DAY_BEFORE_EPOCH = 25566; // 1900-01-01
- static constexpr int DAY_AFTER_EPOCH = 25500; // 2039-10-24
- static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + DAY_AFTER_EPOCH;
+ static constexpr int DAY_BEFORE_EPOCH = 25567;
// 1900-01-01
+ static constexpr int DAY_AFTER_EPOCH = 25566;
// 2039-12-31
+ static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH;
// 1 means 1970-01-01
- static constexpr int START_YEAR = 1900; //
1900-01-01
- static constexpr int END_YEAR = 2039; //
2039-10-24
- static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = 719527; //
1969-12-31
+ static constexpr int START_YEAR = 1900; // 1900-01-01
+ static constexpr int END_YEAR = 2039; // 2039-10-24
+ static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR =
+ 719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1,
returns 1)
- static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR
&& year < END_YEAR; }
+ static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR
&& year <= END_YEAR; }
static int get_offset_by_daynr(int daynr) { return daynr -
DAY_OFFSET_CAL_START_POINT_DAYNR; }
diff --git a/be/test/vec/runtime/vdatetime_value_test.cpp
b/be/test/vec/runtime/vdatetime_value_test.cpp
index 05943dcc6c3..bb396b2ce6f 100644
--- a/be/test/vec/runtime/vdatetime_value_test.cpp
+++ b/be/test/vec/runtime/vdatetime_value_test.cpp
@@ -570,4 +570,172 @@ TEST(VDateTimeValueTest, date_v2_to_string_test) {
}
}
+TEST(VDateTimeValueTest, date_v2_daynr_test) {
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 1970/01/01
+ EXPECT_TRUE(date_v2.get_date_from_daynr(719528));
+ EXPECT_TRUE(date_v2.year() == 1970);
+ EXPECT_TRUE(date_v2.month() == 1);
+ EXPECT_TRUE(date_v2.day() == 1);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(1970, 1, 1) == 719528);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+ EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1970));
+
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719528));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 1969/12/31
+ EXPECT_TRUE(date_v2.get_date_from_daynr(719527));
+ EXPECT_TRUE(date_v2.year() == 1969);
+ EXPECT_TRUE(date_v2.month() == 12);
+ EXPECT_TRUE(date_v2.day() == 31);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(1969, 12, 31) == 719527);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+ EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1969));
+
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719527));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 1900/01/01
+ EXPECT_TRUE(date_v2.get_date_from_daynr(693961));
+ EXPECT_TRUE(date_v2.year() == 1900);
+ EXPECT_TRUE(date_v2.month() == 1);
+ EXPECT_TRUE(date_v2.day() == 1);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(1900, 1, 1) == 693961);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+ EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1900));
+
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693961));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 1899/12/31
+ EXPECT_TRUE(date_v2.get_date_from_daynr(693960));
+ EXPECT_TRUE(date_v2.year() == 1899);
+ EXPECT_TRUE(date_v2.month() == 12);
+ EXPECT_TRUE(date_v2.day() == 31);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(1899, 12, 31) == 693960);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(1899));
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693960));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 2039/12/31
+ EXPECT_TRUE(date_v2.get_date_from_daynr(745094));
+ EXPECT_TRUE(date_v2.year() == 2039);
+ EXPECT_TRUE(date_v2.month() == 12);
+ EXPECT_TRUE(date_v2.day() == 31);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(2039, 12, 31) == 745094);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+ EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(2039));
+
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745094));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 2040/01/01
+ EXPECT_TRUE(date_v2.get_date_from_daynr(745095));
+ EXPECT_TRUE(date_v2.year() == 2040);
+ EXPECT_TRUE(date_v2.month() == 1);
+ EXPECT_TRUE(date_v2.day() == 1);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(2040, 01, 01) == 745095);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(2040));
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745095));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 0000/01/01
+ EXPECT_TRUE(date_v2.get_date_from_daynr(1));
+ EXPECT_TRUE(date_v2.year() == 0);
+ EXPECT_TRUE(date_v2.month() == 1);
+ EXPECT_TRUE(date_v2.day() == 1);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(0, 01, 01) == 1);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+ EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(0));
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(1));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // Invalid date 0000/00/01
+ EXPECT_TRUE(date_v2.year() == 0);
+ EXPECT_TRUE(date_v2.month() == 0);
+ EXPECT_TRUE(date_v2.day() == 0);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(0, 0, 1) == 0);
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // 9999/12/31
+ EXPECT_TRUE(date_v2.get_date_from_daynr(3652424));
+ EXPECT_TRUE(date_v2.year() == 9999);
+ EXPECT_TRUE(date_v2.month() == 12);
+ EXPECT_TRUE(date_v2.day() == 31);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(9999, 12, 31) == 3652424);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(9999));
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652424));
+ }
+
+ {
+ DateV2Value<DateV2ValueType> date_v2;
+ // Invalid date 10000/01/01
+ EXPECT_FALSE(date_v2.get_date_from_daynr(3652425));
+ EXPECT_TRUE(date_v2.year() == 0);
+ EXPECT_TRUE(date_v2.month() == 0);
+ EXPECT_TRUE(date_v2.day() == 0);
+ EXPECT_TRUE(date_v2.hour() == 0);
+ EXPECT_TRUE(date_v2.minute() == 0);
+ EXPECT_TRUE(date_v2.second() == 0);
+ EXPECT_TRUE(date_v2.microsecond() == 0);
+ EXPECT_TRUE(doris::calc_daynr(10000, 01, 01) == 3652425);
+ EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(10000));
+
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652425));
+ }
+}
+
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]