IMPALA-4546: Fix Moscow timezone conversion after 2014 In 2014 Moscow timezone rules changed from UTC+3 with no DST to UTC+4 with no DST. A special case has been added to timestamp functions to handle this.
Testing: Added BE Expr tests. Change-Id: Id6e3f2c9f6ba29749a26bc1087e664637bc02528 Reviewed-on: http://gerrit.cloudera.org:8080/5969 Reviewed-by: Taras Bobrovytsky <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/545bfcfe Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/545bfcfe Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/545bfcfe Branch: refs/heads/master Commit: 545bfcfe673dd24356eb8f84b664b03b6230b01e Parents: b48bdaa Author: Taras Bobrovytsky <[email protected]> Authored: Thu Feb 9 20:53:31 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Mar 3 00:41:35 2017 +0000 ---------------------------------------------------------------------- be/src/exprs/expr-test.cc | 80 +++++++++++++++++++++++--------- be/src/exprs/timestamp-functions.cc | 61 +++++++++++++++++------- be/src/exprs/timezone_db.cc | 4 +- be/src/exprs/timezone_db.h | 15 ++++-- 4 files changed, 115 insertions(+), 45 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/expr-test.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 604644e..0ca2da2 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -4416,28 +4416,66 @@ TEST_F(ExprTest, UnaryOperators) { TestValue("-1 & 8", TYPE_TINYINT, 8); } -// TODO: I think a lot of these casts are not necessary and we should fix this -TEST_F(ExprTest, TimestampFunctions) { - // Regression test for IMPALA-4209 - TestStringValue("cast(from_utc_timestamp(cast(1301180400 as timestamp)," - "'Europe/Moscow') as string)", "2011-03-27 03:00:00"); - TestStringValue("cast(from_utc_timestamp(cast(1301180399 as timestamp)," - "'Europe/Moscow') as string)", "2011-03-27 01:59:59"); - TestStringValue("cast(from_utc_timestamp(cast(1288404000 as timestamp)," - "'Europe/Moscow') as string)", "2010-10-30 06:00:00"); - TestStringValue("cast(from_utc_timestamp(cast(1288584000 as timestamp)," - "'Europe/Moscow') as string)", "2010-11-01 07:00:00"); - TestStringValue("cast(from_utc_timestamp(cast(1301104740 as timestamp)," - "'Europe/Moscow') as string)", "2011-03-26 04:59:00"); - TestStringValue("cast(from_utc_timestamp(cast(1301277600 as timestamp)," - "'Europe/Moscow') as string)", "2011-03-28 06:00:00"); - TestStringValue("cast(from_utc_timestamp(cast(1324947600 as timestamp)," - "'Europe/Moscow') as string)", "2011-12-27 05:00:00"); - TestStringValue("cast(from_utc_timestamp(cast(1325725200 as timestamp)," - "'Europe/Moscow') as string)", "2012-01-05 05:00:00"); - TestStringValue("cast(from_utc_timestamp(cast(1333594800 as timestamp)," - "'Europe/Moscow') as string)", "2012-04-05 07:00:00"); +TEST_F(ExprTest, MoscowTimezoneConversion) { +#pragma push_macro("UTC_TO_MSC") +#pragma push_macro("MSC_TO_UTC") +#define UTC_TO_MSC(X) ("cast(from_utc_timestamp('" X "', 'Europe/Moscow') as string)") +#define MSC_TO_UTC(X) ("cast(to_utc_timestamp('" X "', 'Europe/Moscow') as string)") + + // IMPALA-4209: Moscow time change in 2011. + // Last DST change before the transition. + TestStringValue(UTC_TO_MSC("2010-10-30 22:59:59"), "2010-10-31 02:59:59"); + TestStringValue(UTC_TO_MSC("2010-10-30 23:00:00"), "2010-10-31 02:00:00"); + TestStringValue(MSC_TO_UTC("2010-10-31 01:59:59"), "2010-10-30 21:59:59"); + // Since 2am to 2:59:59.999...am MSC happens twice, the ambiguity gets resolved by + // returning null. + TestIsNull(MSC_TO_UTC("2010-10-31 02:00:00"), TYPE_STRING); + TestIsNull(MSC_TO_UTC("2010-10-31 02:59:59"), TYPE_STRING); + TestStringValue(MSC_TO_UTC("2010-10-31 03:00:00"), "2010-10-31 00:00:00"); + + // Moscow time transitions to UTC+4. + TestStringValue(UTC_TO_MSC("2011-03-26 22:59:59"), "2011-03-27 01:59:59"); + TestStringValue(UTC_TO_MSC("2011-03-26 23:00:00"), "2011-03-27 03:00:00"); + TestStringValue(MSC_TO_UTC("2011-03-27 01:59:59"), "2011-03-26 22:59:59"); + // Since 2am to 2:59:59.999...am MSC happens twice, the ambiguity gets resolved by + // returning null. + TestIsNull(MSC_TO_UTC("2011-03-27 02:00:00"), TYPE_STRING); + TestIsNull(MSC_TO_UTC("2011-03-27 02:59:59"), TYPE_STRING); + TestStringValue(MSC_TO_UTC("2011-03-27 03:00:00"), "2011-03-26 23:00:00"); + + // No more DST after the transition. + TestStringValue(UTC_TO_MSC("2011-12-20 09:00:00"), "2011-12-20 13:00:00"); + TestStringValue(UTC_TO_MSC("2012-06-20 09:00:00"), "2012-06-20 13:00:00"); + TestStringValue(UTC_TO_MSC("2012-12-20 09:00:00"), "2012-12-20 13:00:00"); + TestStringValue(MSC_TO_UTC("2011-12-20 13:00:00"), "2011-12-20 09:00:00"); + TestStringValue(MSC_TO_UTC("2012-06-20 13:00:00"), "2012-06-20 09:00:00"); + TestStringValue(MSC_TO_UTC("2012-12-20 13:00:00"), "2012-12-20 09:00:00"); + + // IMPALA-4546: Moscow time change in 2014. + // UTC+4 is changed to UTC+3 + TestStringValue(UTC_TO_MSC("2014-10-25 21:59:59"), "2014-10-26 01:59:59"); + TestStringValue(UTC_TO_MSC("2014-10-25 22:00:00"), "2014-10-26 01:00:00"); + TestStringValue(UTC_TO_MSC("2014-10-25 23:00:00"), "2014-10-26 02:00:00"); + TestStringValue(MSC_TO_UTC("2014-10-26 00:59:59"), "2014-10-25 20:59:59"); + // Since 1am to 1:59:59.999...am MSC happens twice, the ambiguity gets resolved by + // returning null. + TestIsNull(MSC_TO_UTC("2014-10-26 01:00:00"), TYPE_STRING); + TestIsNull(MSC_TO_UTC("2014-10-26 01:59:59"), TYPE_STRING); + TestStringValue(MSC_TO_UTC("2014-10-26 02:00:00"), "2014-10-25 23:00:00"); + + // Still no DST after the transition. + TestStringValue(UTC_TO_MSC("2014-12-20 09:00:00"), "2014-12-20 12:00:00"); + TestStringValue(UTC_TO_MSC("2015-06-20 09:00:00"), "2015-06-20 12:00:00"); + TestStringValue(UTC_TO_MSC("2015-12-20 09:00:00"), "2015-12-20 12:00:00"); + TestStringValue(MSC_TO_UTC("2014-12-20 12:00:00"), "2014-12-20 09:00:00"); + TestStringValue(MSC_TO_UTC("2015-06-20 12:00:00"), "2015-06-20 09:00:00"); + TestStringValue(MSC_TO_UTC("2015-12-20 12:00:00"), "2015-12-20 09:00:00"); + +#pragma pop_macro("MSC_TO_UTC") +#pragma pop_macro("UTC_TO_MSC") +} +TEST_F(ExprTest, TimestampFunctions) { // Regression for IMPALA-1105 TestIsNull("cast(cast('NOTATIMESTAMP' as timestamp) as string)", TYPE_STRING); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/timestamp-functions.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc index a63438c..89bb498 100644 --- a/be/src/exprs/timestamp-functions.cc +++ b/be/src/exprs/timestamp-functions.cc @@ -85,7 +85,7 @@ TimestampVal TimestampFunctions::FromUtc(FunctionContext* context, const StringValue& tz_string_value = StringValue::FromStringVal(tz_string_val); time_zone_ptr timezone = TimezoneDatabase::FindTimezone( - string(tz_string_value.ptr, tz_string_value.len), ts_value); + string(tz_string_value.ptr, tz_string_value.len), ts_value, true); if (timezone == NULL) { // This should return null. Hive just ignores it. stringstream ss; @@ -123,15 +123,25 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* context, const StringValue& tz_string_value = StringValue::FromStringVal(tz_string_val); time_zone_ptr timezone = TimezoneDatabase::FindTimezone( - string(tz_string_value.ptr, tz_string_value.len), ts_value); + string(tz_string_value.ptr, tz_string_value.len), ts_value, false); // This should raise some sort of error or at least null. Hive Just ignores it. - if (timezone == NULL) { + if (UNLIKELY(timezone == NULL)) { stringstream ss; ss << "Unknown timezone '" << tz_string_value << "'" << endl; context->AddWarning(ss.str().c_str()); return ts_val; } + if (UNLIKELY(timezone == TimezoneDatabase::TIMEZONE_MSK_PRE_2014 && + ts_value.date().day_number() == 2456957 && + ts_value.time().hours() >= 1)) { + // On October 27, 2014 at 1:00 am MSC, Moscow time transitions from UTC+4 with no DST + // to UTC+3 with no DST. Because of this, 1am to 1:59:59.999...am MSC happens twice. + // We want to be consistent with the existing rule of "in case of ambiguity return + // NULL". + DCHECK_LT(ts_value.time().hours(), 2); + return TimestampVal::null(); + } try { local_date_time lt(ts_value.date(), ts_value.time(), timezone, local_date_time::NOT_DATE_TIME_ON_ERROR); @@ -186,21 +196,36 @@ void TimestampFunctions::UnixAndFromUnixClose(FunctionContext* context, } } -time_zone_ptr TimezoneDatabase::FindTimezone(const string& tz, const TimestampValue& tv) { - // The backing database does not capture some subtleties, there are special cases - if ((tv.date().year() < 2011 || (tv.date().year() == 2011 && tv.date().month() < 4)) && - (iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", tz))) { - // We transition in pre April 2011 from using the tz_database_ to a custom rule - // Russia stopped using daylight savings in 2011, the tz_database_ is - // set up assuming Russia uses daylight saving every year. - // Sun, Mar 27, 2:00AM Moscow clocks moved forward +1 hour (a total of GMT +4) - // Specifically, - // UTC Time 26 Mar 2011 22:59:59 +0000 ===> Sun Mar 27 01:59:59 MSK 2011 - // UTC Time 26 Mar 2011 23:00:00 +0000 ===> Sun Mar 27 03:00:00 MSK 2011 - // This means in 2011, The database rule will apply DST starting March 26 2011. - // This will be a correct +4 offset, and the database rule can apply until - // Oct 31 when tz_database_ will incorrectly attempt to turn clocks backwards 1 hour. - return TIMEZONE_MSK_PRE_2011_DST; +time_zone_ptr TimezoneDatabase::FindTimezone( + const string& tz, const TimestampValue& tv, bool tv_in_utc) { + // The backing database does not handle timezone rule changes. + if (iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", tz)) { + if (tv.date().year() < 2011 || (tv.date().year() == 2011 && tv.date().month() < 4)) { + // Between January 19, 1992 and March 27, 2011 Moscow time was UTC+3 with DST. On + // March 27, 2011 Moscow time transitioned to UTC+4 with no DST. NOTE: We currently + // do not handle Moscow time conversions for dates before January 19, 1992 + // correctly (Impala incorrectly thinks the Moscow timezone is UTC+3 with DST + // instead of UTC+2 with DST for those dates). + return TIMEZONE_MSK_PRE_2011_DST; + } + // On October 26, 2014 at 22:00:00 UTC, Moscow time transitioned to UTC+3 with no + // DST. We have to make a precise time check here, unlike in the case above, because + // we can't rely on the timezone database to handle the moment of transition because + // the rule change does not coincide with a DST change. + const int MSK_TRANSITION_DAY = 2456956; + const int MSK_TRANSITION_HOUR_UTC = 22; + const int MSK_UTC_OFFSET = 4; + if (tv.date().day_number() < MSK_TRANSITION_DAY) return TIMEZONE_MSK_PRE_2014; + if (tv_in_utc) { + if (tv.date().day_number() == MSK_TRANSITION_DAY && + tv.time().hours() < MSK_TRANSITION_HOUR_UTC) { + return TIMEZONE_MSK_PRE_2014; + } + } else if (tv.date().day_number() < MSK_TRANSITION_DAY + 1 || ( + tv.date().day_number() == MSK_TRANSITION_DAY + 1 && + tv.time().hours() < (MSK_TRANSITION_HOUR_UTC + MSK_UTC_OFFSET) % 24)) { + return TIMEZONE_MSK_PRE_2014; + } } // See if they specified a zone id http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/timezone_db.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/timezone_db.cc b/be/src/exprs/timezone_db.cc index ae58f86..c7ae888 100644 --- a/be/src/exprs/timezone_db.cc +++ b/be/src/exprs/timezone_db.cc @@ -36,6 +36,8 @@ vector<string> TimezoneDatabase::tz_region_list_; const time_zone_ptr TimezoneDatabase::TIMEZONE_MSK_PRE_2011_DST(time_zone_ptr( new posix_time_zone(string("MSK+03MSK+01,M3.5.0/02:00:00,M10.5.0/03:00:00")))); +const time_zone_ptr TimezoneDatabase::TIMEZONE_MSK_PRE_2014(time_zone_ptr( + new posix_time_zone(string("MSK+04")))); const char* TimezoneDatabase::TIMEZONE_DATABASE_STR = "\"ID\",\"STD ABBR\",\"STD NAME\",\"DST ABBR\",\"DST NAME\",\"GMT offset\",\"DST adjustment\",\"DST Start Date rule\",\"Start time\",\"DST End date rule\",\"End time\"\n\ \"ACT\",\"ACST\",\"Australian Central Standard Time (Northern Territory)\",\"\",\"\",\"+09:30:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\ @@ -502,7 +504,7 @@ const char* TimezoneDatabase::TIMEZONE_DATABASE_STR = "\"ID\",\"STD ABBR\",\"STD \"Europe/Mariehamn\",\"EET\",\"Eastern European Time\",\"EEST\",\"Eastern European Summer Time\",\"+02:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\ \"Europe/Minsk\",\"MSK\",\"Moscow Standard Time\",\"\",\"\",\"+03:00:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\ \"Europe/Monaco\",\"CET\",\"Central European Time\",\"CEST\",\"Central European Summer Time\",\"+01:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\ -\"Europe/Moscow\",\"MSK\",\"Moscow Standard Time\",\"\",\"\",\"+04:00:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\ +\"Europe/Moscow\",\"MSK\",\"Moscow Standard Time\",\"\",\"\",\"+03:00:00\",\"+00:00:00\",\"\",\"\",\"\",\"\"\n\ \"Europe/Nicosia\",\"EET\",\"Eastern European Time\",\"EEST\",\"Eastern European Summer Time\",\"+02:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\ \"Europe/Oslo\",\"CET\",\"Central European Time\",\"CEST\",\"Central European Summer Time\",\"+01:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\ \"Europe/Paris\",\"CET\",\"Central European Time\",\"CEST\",\"Central European Summer Time\",\"+01:00:00\",\"+01:00:00\",\"-1;0;3\",\"+01:00:00\",\"-1;0;10\",\"+01:00:00\"\n\ http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/545bfcfe/be/src/exprs/timezone_db.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/timezone_db.h b/be/src/exprs/timezone_db.h index 76d08d5..20ea205 100644 --- a/be/src/exprs/timezone_db.h +++ b/be/src/exprs/timezone_db.h @@ -34,15 +34,20 @@ class TimezoneDatabase { /// Set up the static timezone database. static Status Initialize(); - /// Converts the name of a timezone to a boost timezone object. - /// Some countries change their timezones, the tiemstamp is required to correctly - /// determine the timezone information. + /// Converts the name of a timezone to a boost timezone object. In some cases, the + /// timestamp is required to determine the timezone because occasionally timezone + /// rules change. The flag "tv_in_utc" specifies whether the given timestamp value is + /// in the UTC timezone. If the flag is false, then the timestamp value is assumed to + /// be in the 'tz' timezone. static boost::local_time::time_zone_ptr FindTimezone(const std::string& tz, - const TimestampValue& tv); + const TimestampValue& tv, bool tv_in_utc); - /// Moscow Timezone No Daylight Savings Time (GMT+4), for use after March 2011 + /// Moscow timezone UTC+3 with DST, for use before March 27, 2011. static const boost::local_time::time_zone_ptr TIMEZONE_MSK_PRE_2011_DST; + /// Moscow timezone UTC+4 with no DST, for use before October 26, 2014. + static const boost::local_time::time_zone_ptr TIMEZONE_MSK_PRE_2014; + private: static const char* TIMEZONE_DATABASE_STR; static boost::local_time::tz_database tz_database_;
