Revert "IMPALA-2716: Hive/Impala incompatibility for timestamp data in Parquet"
Reverting IMPALA-2716 as SparkSQL does not agree with the approach taken. More details can be found at: https://issues.apache.org/jira/browse/SPARK-12297 Change-Id: Ic66de277c622748540c1b9969152c2cabed1f3bd Reviewed-on: http://gerrit.cloudera.org:8080/6896 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/21f90633 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/21f90633 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/21f90633 Branch: refs/heads/master Commit: 21f90633044c727e81f467ccf04a217a709ab0b2 Parents: 691bbf0 Author: Attila Jeges <[email protected]> Authored: Tue May 16 17:48:36 2017 +0200 Committer: Impala Public Jenkins <[email protected]> Committed: Tue May 23 01:46:22 2017 +0000 ---------------------------------------------------------------------- be/src/benchmarks/CMakeLists.txt | 1 - .../benchmarks/convert-timestamp-benchmark.cc | 163 ------------------ be/src/exec/hdfs-scan-node-base.cc | 2 - be/src/exec/hdfs-scan-node-base.h | 6 - be/src/exec/parquet-column-readers.cc | 98 ++--------- be/src/exprs/timestamp-functions.cc | 43 ++--- be/src/exprs/timezone_db.h | 9 - be/src/runtime/timestamp-value.cc | 34 +--- be/src/runtime/timestamp-value.h | 13 +- be/src/service/fe-support.cc | 19 --- be/src/service/impala-server.cc | 4 - be/src/util/backend-gflag-util.cc | 3 - common/thrift/BackendGflags.thrift | 4 - common/thrift/PlanNodes.thrift | 5 - common/thrift/generate_error_codes.py | 5 +- .../analysis/AlterTableSetTblProperties.java | 27 --- .../apache/impala/analysis/BaseTableRef.java | 19 --- .../apache/impala/analysis/CreateTableStmt.java | 21 --- .../org/apache/impala/catalog/HdfsTable.java | 16 -- .../org/apache/impala/planner/HdfsScanNode.java | 5 +- .../apache/impala/service/BackendConfig.java | 3 - .../org/apache/impala/service/FeSupport.java | 14 -- .../apache/impala/analysis/AnalyzeDDLTest.java | 52 ------ tests/common/impala_test_suite.py | 23 --- .../test_hive_parquet_timestamp_conversion.py | 170 ++----------------- tests/metadata/test_ddl.py | 8 +- tests/metadata/test_ddl_base.py | 23 +++ .../test_parquet_timestamp_compatibility.py | 135 --------------- 28 files changed, 75 insertions(+), 850 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/benchmarks/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt index 1739375..e954583 100644 --- a/be/src/benchmarks/CMakeLists.txt +++ b/be/src/benchmarks/CMakeLists.txt @@ -56,6 +56,5 @@ ADD_BE_BENCHMARK(string-compare-benchmark) ADD_BE_BENCHMARK(string-search-benchmark) ADD_BE_BENCHMARK(thread-create-benchmark) ADD_BE_BENCHMARK(tuple-layout-benchmark) -ADD_BE_BENCHMARK(convert-timestamp-benchmark) target_link_libraries(hash-benchmark Experiments) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/benchmarks/convert-timestamp-benchmark.cc ---------------------------------------------------------------------- diff --git a/be/src/benchmarks/convert-timestamp-benchmark.cc b/be/src/benchmarks/convert-timestamp-benchmark.cc deleted file mode 100644 index 8a5cd3e..0000000 --- a/be/src/benchmarks/convert-timestamp-benchmark.cc +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include <time.h> -#include <stdlib.h> -#include <stdio.h> -#include <iostream> -#include <vector> -#include <sstream> -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/local_time/local_time.hpp> -#include <boost/thread/thread.hpp> -#include <boost/shared_ptr.hpp> - -#include "exprs/timezone_db.h" -#include "runtime/timestamp-parse-util.h" -#include "runtime/timestamp-value.h" -#include "util/benchmark.h" -#include "util/cpu-info.h" -#include "util/pretty-printer.h" -#include "util/stopwatch.h" - -#include "common/names.h" - -namespace gregorian = boost::gregorian; -using boost::posix_time::duration_from_string; -using boost::posix_time::hours; -using boost::posix_time::nanoseconds; -using boost::posix_time::ptime; -using boost::posix_time::time_duration; -using boost::posix_time::to_iso_extended_string; -using boost::posix_time::to_simple_string; -using boost::local_time::time_zone_ptr; -using boost::local_time::posix_time_zone; -using namespace impala; - -// Benchmark for converting timestamps from UTC to local time and from UTC to a given time -// zone. -// Machine Info: Intel(R) Core(TM) i5-6600 CPU @ 3.30GHz -// ConvertTimestamp: Function 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile -// (relative) (relative) (relative) -// ------------------------------------------------------------------------------------ -// FromUtc 0.0147 0.0152 0.0155 1X 1X 1X -// UtcToLocal 0.0216 0.0228 0.0234 1.47X 1.5X 1.51X - -time_zone_ptr LOCAL_TZ; - -struct TestData { - vector<TimestampValue> data; - vector<TimestampValue> result; -}; - -void AddTestDataDateTimes(TestData* data, int n, const string& startstr) { - DateTimeFormatContext dt_ctx; - dt_ctx.Reset("yyyy-MMM-dd HH:mm:ss", 19); - TimestampParser::ParseFormatTokens(&dt_ctx); - - ptime start(boost::posix_time::time_from_string(startstr)); - for (int i = 0; i < n; ++i) { - int val = rand(); - start += gregorian::date_duration(rand() % 100); - start += nanoseconds(val); - stringstream ss; - ss << to_simple_string(start); - string ts = ss.str(); - data->data.push_back(TimestampValue::Parse(ts.c_str(), ts.size(), dt_ctx)); - } -} - -void TestFromUtc(int batch_size, void* d) { - TestData* data = reinterpret_cast<TestData*>(d); - for (int i = 0; i < batch_size; ++i) { - int n = data->data.size(); - for (int j = 0; j < n; ++j) { - TimestampValue ts = data->data[j]; - ts.FromUtc(LOCAL_TZ); - data->result[j] = ts; - } - } -} - -void TestUtcToLocal(int batch_size, void* d) { - TestData* data = reinterpret_cast<TestData*>(d); - for (int i = 0; i < batch_size; ++i) { - int n = data->data.size(); - for (int j = 0; j < n; ++j) { - TimestampValue ts = data->data[j]; - ts.UtcToLocal(); - data->result[j] = ts; - } - } -} - -int main(int argc, char **argv) { - CpuInfo::Init(); - cout << Benchmark::GetMachineInfo() << endl; - - TimestampParser::Init(); - ABORT_IF_ERROR(TimezoneDatabase::Initialize()); - - // Initialize LOCAL_TZ to local time zone - tzset(); - time_t now = time(0); - LOCAL_TZ = time_zone_ptr(new posix_time_zone(tzname[localtime(&now)->tm_isdst])); - - TestData datetimes; - AddTestDataDateTimes(&datetimes, 10000, "1953-04-22 01:02:03"); - datetimes.result.resize(datetimes.data.size()); - - Benchmark timestamp_suite("ConvertTimestamp"); - timestamp_suite.AddBenchmark("FromUtc", TestFromUtc, &datetimes); - timestamp_suite.AddBenchmark("UtcToLocal", TestUtcToLocal, &datetimes); - - cout << timestamp_suite.Measure() << endl; - - // If number of threads is specified, run multithreaded tests. - int num_of_threads = (argc < 2) ? 0 : atoi(argv[1]); - if (num_of_threads >= 1) { - vector<boost::shared_ptr<boost::thread> > threads(num_of_threads); - StopWatch sw; - // Test UtcToLocal() - sw.Start(); - for (auto& t: threads) { - t = boost::shared_ptr<boost::thread>( - new boost::thread(TestUtcToLocal, 100, &datetimes)); - } - for (auto& t: threads) t->join(); - uint64_t utc_to_local_elapsed_time = sw.ElapsedTime(); - sw.Stop(); - - // Test FromUtc() - sw.Start(); - for (auto& t: threads) { - t = boost::shared_ptr<boost::thread>( - new boost::thread(TestFromUtc, 100, &datetimes)); - } - for (auto& t: threads) t->join(); - uint64_t from_utc_elapsed_time = sw.ElapsedTime(); - sw.Stop(); - - cout << "Number of threads: " << num_of_threads << endl - << "TestFromUtc: " - << PrettyPrinter::Print(from_utc_elapsed_time, TUnit::CPU_TICKS) << endl - << "TestUtcToLocal: " - << PrettyPrinter::Print(utc_to_local_elapsed_time, TUnit::CPU_TICKS) << endl; - } - - return 0; -} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/exec/hdfs-scan-node-base.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc index b6366a9..eee5fbc 100644 --- a/be/src/exec/hdfs-scan-node-base.cc +++ b/be/src/exec/hdfs-scan-node-base.cc @@ -88,8 +88,6 @@ HdfsScanNodeBase::HdfsScanNodeBase(ObjectPool* pool, const TPlanNode& tnode, min_max_tuple_desc_(nullptr), skip_header_line_count_(tnode.hdfs_scan_node.__isset.skip_header_line_count ? tnode.hdfs_scan_node.skip_header_line_count : 0), - parquet_mr_write_zone_(tnode.hdfs_scan_node.__isset.parquet_mr_write_zone ? - tnode.hdfs_scan_node.parquet_mr_write_zone : ""), tuple_id_(tnode.hdfs_scan_node.tuple_id), reader_context_(NULL), tuple_desc_(NULL), http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/exec/hdfs-scan-node-base.h ---------------------------------------------------------------------- diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h index 94e8952..02565ff 100644 --- a/be/src/exec/hdfs-scan-node-base.h +++ b/be/src/exec/hdfs-scan-node-base.h @@ -153,7 +153,6 @@ class HdfsScanNodeBase : public ScanNode { const AvroSchemaElement& avro_schema() { return *avro_schema_.get(); } RuntimeState* runtime_state() { return runtime_state_; } int skip_header_line_count() const { return skip_header_line_count_; } - const std::string& parquet_mr_write_zone() const { return parquet_mr_write_zone_; } DiskIoRequestContext* reader_context() { return reader_context_; } typedef std::map<TupleId, std::vector<ExprContext*>> ConjunctsMap; @@ -315,11 +314,6 @@ class HdfsScanNodeBase : public ScanNode { // to values > 0 for hdfs text files. const int skip_header_line_count_; - // Time zone for adjusting timestamp values read from Parquet files written by - // parquet-mr. If conversion should not occur, this is set to an empty string. Otherwise - // FE guarantees that this is a valid time zone. - const std::string parquet_mr_write_zone_; - /// Tuple id resolved in Prepare() to set tuple_desc_ const int tuple_id_; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/exec/parquet-column-readers.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/parquet-column-readers.cc b/be/src/exec/parquet-column-readers.cc index 8f703ec..f1bbf61 100644 --- a/be/src/exec/parquet-column-readers.cc +++ b/be/src/exec/parquet-column-readers.cc @@ -27,7 +27,6 @@ #include "exec/parquet-metadata-utils.h" #include "exec/parquet-scratch-tuple-batch.h" #include "exec/read-write-util.h" -#include "exprs/timezone_db.h" #include "rpc/thrift-util.h" #include "runtime/collection-value-builder.h" #include "runtime/tuple-row.h" @@ -212,9 +211,7 @@ class ScalarColumnReader : public BaseScalarColumnReader { const SlotDescriptor* slot_desc) : BaseScalarColumnReader(parent, node, slot_desc), dict_decoder_init_(false), - needs_conversion_(false), - timezone_(NULL), - is_timestamp_dependent_timezone_(false) { + needs_conversion_(false) { if (!MATERIALIZED) { // We're not materializing any values, just counting them. No need (or ability) to // initialize state used to materialize values. @@ -231,25 +228,12 @@ class ScalarColumnReader : public BaseScalarColumnReader { } else { fixed_len_size_ = -1; } - needs_conversion_ = slot_desc_->type().type == TYPE_CHAR || ( - slot_desc_->type().type == TYPE_TIMESTAMP && - parent->file_version_.application == "parquet-mr" && - parent_->scan_node_->parquet_mr_write_zone() != "UTC" && ( - !parent_->scan_node_->parquet_mr_write_zone().empty() || - FLAGS_convert_legacy_hive_parquet_utc_timestamps - ) - ); - - if (needs_conversion_ && slot_desc_->type().type == TYPE_TIMESTAMP && - !parent_->scan_node_->parquet_mr_write_zone().empty()) { - is_timestamp_dependent_timezone_ = TimezoneDatabase::IsTimestampDependentTimezone( - parent_->scan_node_->parquet_mr_write_zone()); - if (!is_timestamp_dependent_timezone_) { - timezone_ = TimezoneDatabase::FindTimezone( - parent_->scan_node_->parquet_mr_write_zone()); - } - DCHECK_EQ(is_timestamp_dependent_timezone_, (timezone_ == NULL)); - } + needs_conversion_ = slot_desc_->type().type == TYPE_CHAR || + // TODO: Add logic to detect file versions that have unconverted TIMESTAMP + // values. Currently all versions have converted values. + (FLAGS_convert_legacy_hive_parquet_utc_timestamps && + slot_desc_->type().type == TYPE_TIMESTAMP && + parent->file_version_.application == "parquet-mr"); } virtual ~ScalarColumnReader() { } @@ -557,16 +541,6 @@ class ScalarColumnReader : public BaseScalarColumnReader { /// true if decoded values must be converted before being written to an output tuple. bool needs_conversion_; - /// Used to cache the timezone object corresponding to the "parquet.mr.int96.write.zone" - /// table property to avoid repeated calls to TimezoneDatabase::FindTimezone(). Set to - /// NULL if the table property is not set, or if it is set to UTC or to a timestamp - /// dependent timezone. - boost::local_time::time_zone_ptr timezone_; - - /// true if "parquet.mr.int96.write.zone" table property is set to a timestamp dependent - /// timezone. - bool is_timestamp_dependent_timezone_; - /// The size of this column with plain encoding for FIXED_LEN_BYTE_ARRAY, or /// the max length for VARCHAR columns. Unused otherwise. int fixed_len_size_; @@ -610,63 +584,13 @@ inline bool ScalarColumnReader<TimestampValue, true>::NeedsConversionInline() co return needs_conversion_; } -/// Sets timestamp conversion error message in 'scanner_status'. Returns false if the -/// execution should be aborted, otherwise returns true. -bool __attribute__((noinline)) SetTimestampConversionError(HdfsScanNodeBase* scan_node, - RuntimeState* scanner_state, const TimestampValue* tv, const string& timezone, - const string& detail, Status* scanner_status) { - ErrorMsg msg(TErrorCode::PARQUET_MR_TIMESTAMP_CONVERSION_FAILED, tv->ToString(), - timezone, scan_node->hdfs_table()->fully_qualified_name()); - if (!detail.empty()) msg.AddDetail(detail); - Status status = scanner_state->LogOrReturnError(msg); - if (!status.ok()) { - *scanner_status = status; - return false; - } - return true; -} - template<> bool ScalarColumnReader<TimestampValue, true>::ConvertSlot( -const TimestampValue* src, TimestampValue* dst, MemPool* pool) { - // Conversion should only happen when "parquet.mr.int96.write.zone" table property is - // not set to "UTC" - DCHECK_NE(parent_->scan_node_->parquet_mr_write_zone(), "UTC"); - + const TimestampValue* src, TimestampValue* dst, MemPool* pool) { + // Conversion should only happen when this flag is enabled. + DCHECK(FLAGS_convert_legacy_hive_parquet_utc_timestamps); *dst = *src; - if (LIKELY(dst->HasDateAndTime())) { - if (LIKELY(timezone_ != NULL)) { - // Not a timestamp specific timezone. Convert timestamp to the timezone object - // cached in timezone_. - if (UNLIKELY(!dst->FromUtc(timezone_))) { - if (!SetTimestampConversionError(parent_->scan_node_, parent_->state_, - src, parent_->scan_node_->parquet_mr_write_zone(), "", - &parent_->parse_status_)) { - return false; - } - } - } else if (UNLIKELY(is_timestamp_dependent_timezone_)) { - // Timestamp specific timezone (such as Moscow pre 2011). - // Call timestamp conversion function with the timezone string. - if (UNLIKELY(!dst->FromUtc(parent_->scan_node_->parquet_mr_write_zone()))) { - if (!SetTimestampConversionError(parent_->scan_node_, parent_->state_, - src, parent_->scan_node_->parquet_mr_write_zone(), "", - &parent_->parse_status_)) { - return false; - } - } - } else { - DCHECK(parent_->scan_node_->parquet_mr_write_zone().empty()); - DCHECK(FLAGS_convert_legacy_hive_parquet_utc_timestamps); - Status s = dst->UtcToLocal(); - if (UNLIKELY(!s.ok())) { - if (!SetTimestampConversionError(parent_->scan_node_, parent_->state_, - src, "localtime", s.GetDetail(), &parent_->parse_status_)) { - return false; - } - } - } - } + if (dst->HasDateAndTime()) dst->UtcToLocal(); return true; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/exprs/timestamp-functions.cc ---------------------------------------------------------------------- diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc index c5eb4b1..93a16cd 100644 --- a/be/src/exprs/timestamp-functions.cc +++ b/be/src/exprs/timestamp-functions.cc @@ -197,32 +197,8 @@ void TimestampFunctions::UnixAndFromUnixClose(FunctionContext* context, } } -time_zone_ptr TimezoneDatabase::FindTimezone(const string& tz) { - if (tz.empty()) return NULL; - - // Look up time zone in 'tz_database' by region. - time_zone_ptr tzp = tz_database_.time_zone_from_region(tz); - if (tzp != NULL) return tzp; - - // Look up time zone in 'tz_database' by name variations. The following name variations - // are considered: - // - daylight savings abbreviation - // - standard abbreviation - // - daylight savings name - // - standard name - for (const string& tz_region: tz_region_list_) { - time_zone_ptr tzp = tz_database_.time_zone_from_region(tz_region); - DCHECK(tzp != NULL); - if (tzp->dst_zone_abbrev() == tz) return tzp; - if (tzp->std_zone_abbrev() == tz) return tzp; - if (tzp->dst_zone_name() == tz) return tzp; - if (tzp->std_zone_name() == tz) return tzp; - } - return NULL; -} - time_zone_ptr TimezoneDatabase::FindTimezone( - const string& tz, const TimestampValue& tv, bool tv_in_utc) { + const string& tz, const TimestampValue& tv, bool tv_in_utc) { // The backing database does not handle timezone rule changes. if (iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", tz)) { if (tv.date().year() < 2011 || (tv.date().year() == 2011 && tv.date().month() < 4)) { @@ -253,11 +229,20 @@ time_zone_ptr TimezoneDatabase::FindTimezone( } } - return FindTimezone(tz); -} + // See if they specified a zone id + time_zone_ptr tzp = tz_database_.time_zone_from_region(tz); + if (tzp != NULL) return tzp; -bool TimezoneDatabase::IsTimestampDependentTimezone(const string& tz) { - return iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", tz); + for (vector<string>::const_iterator iter = tz_region_list_.begin(); + iter != tz_region_list_.end(); ++iter) { + time_zone_ptr tzp = tz_database_.time_zone_from_region(*iter); + DCHECK(tzp != NULL); + if (tzp->dst_zone_abbrev() == tz) return tzp; + if (tzp->std_zone_abbrev() == tz) return tzp; + if (tzp->dst_zone_name() == tz) return tzp; + if (tzp->std_zone_name() == tz) return tzp; + } + return time_zone_ptr(); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/exprs/timezone_db.h ---------------------------------------------------------------------- diff --git a/be/src/exprs/timezone_db.h b/be/src/exprs/timezone_db.h index 9a9b14f..3a1178a 100644 --- a/be/src/exprs/timezone_db.h +++ b/be/src/exprs/timezone_db.h @@ -42,15 +42,6 @@ class TimezoneDatabase { static boost::local_time::time_zone_ptr FindTimezone(const std::string& tz, const TimestampValue& tv, bool tv_in_utc); - /// Converts the name of a timezone to a boost timezone object without taking into - /// account the timestamp. May not work correctly when IsTimestampDependentTimezone(tz) - /// is true, e.g. Moscow timezone. - /// If 'tz' is not found in the database, nullptr is returned. - static boost::local_time::time_zone_ptr FindTimezone(const std::string& tz); - - /// Returns true if 'tz' specifies a timezone that was changed in the past. - static bool IsTimestampDependentTimezone(const std::string& tz); - /// Moscow timezone UTC+3 with DST, for use before March 27, 2011. static const boost::local_time::time_zone_ptr TIMEZONE_MSK_PRE_2011_DST; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/runtime/timestamp-value.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/timestamp-value.cc b/be/src/runtime/timestamp-value.cc index b3acc56..be76db2 100644 --- a/be/src/runtime/timestamp-value.cc +++ b/be/src/runtime/timestamp-value.cc @@ -16,8 +16,6 @@ // under the License. #include "runtime/timestamp-value.h" -#include "exprs/timestamp-functions.h" -#include "exprs/timezone_db.h" #include <boost/date_time/posix_time/posix_time.hpp> @@ -28,8 +26,6 @@ using boost::date_time::not_a_date_time; using boost::gregorian::date; using boost::gregorian::date_duration; -using boost::local_time::local_date_time; -using boost::local_time::time_zone_ptr; using boost::posix_time::nanoseconds; using boost::posix_time::ptime; using boost::posix_time::ptime_from_tm; @@ -81,7 +77,7 @@ int TimestampValue::Format(const DateTimeFormatContext& dt_ctx, int len, char* b return TimestampParser::Format(dt_ctx, date_, time_, len, buff); } -Status TimestampValue::UtcToLocal() { +void TimestampValue::UtcToLocal() { DCHECK(HasDateAndTime()); // Previously, conversion was done using boost functions but it was found to be // too slow. Doing the conversion without function calls (which also avoids some @@ -96,7 +92,7 @@ Status TimestampValue::UtcToLocal() { tm temp; if (UNLIKELY(localtime_r(&utc, &temp) == nullptr)) { *this = ptime(not_a_date_time); - return Status("Failed to convert timestamp to local time."); + return; } // Unlikely but a time zone conversion may push the value over the min/max // boundary resulting in an exception. @@ -106,33 +102,9 @@ Status TimestampValue::UtcToLocal() { static_cast<unsigned short>(temp.tm_mday)); time_ = time_duration(temp.tm_hour, temp.tm_min, temp.tm_sec, time().fractional_seconds()); - } catch (std::exception& from_boost) { - Status s("Failed to convert timestamp to local time."); - s.AddDetail(from_boost.what()); + } catch (std::exception& /* from Boost */) { *this = ptime(not_a_date_time); - return s; } - return Status::OK(); -} - -bool TimestampValue::FromUtc(const std::string& timezone_str) { - DCHECK(HasDateAndTime()); - time_zone_ptr timezone = TimezoneDatabase::FindTimezone(timezone_str, *this, true); - if (UNLIKELY(timezone == nullptr)) { - *this = ptime(not_a_date_time); - return false; - } - return FromUtc(timezone); -} - -bool TimestampValue::FromUtc(time_zone_ptr timezone) { - DCHECK(HasDateAndTime()); - DCHECK(timezone != nullptr); - ptime temp; - ToPtime(&temp); - local_date_time lt(temp, timezone); - *this = lt.local_time(); - return true; } ostream& operator<<(ostream& os, const TimestampValue& timestamp_value) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/runtime/timestamp-value.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/timestamp-value.h b/be/src/runtime/timestamp-value.h index e7d4cfd..f0ba1f6 100644 --- a/be/src/runtime/timestamp-value.h +++ b/be/src/runtime/timestamp-value.h @@ -26,7 +26,6 @@ #include <gflags/gflags.h> #include <string> -#include "common/status.h" #include "udf/udf.h" #include "util/hash-util.h" @@ -209,16 +208,8 @@ class TimestampValue { bool ToSubsecondUnixTime(double* unix_time) const; /// Converts from UTC to local time in-place. The caller must ensure the TimestampValue - /// this function is called upon has both a valid date and time. Returns Status::OK() if - /// conversion was successfull and an error Status otherwise. If conversion failed *this - /// is set to a ptime object initialized to not_a_date_time. - Status UtcToLocal(); - - /// Converts from UTC to given timezone in-place. Returns true if conversion was - /// successfull and false otherwise. If conversion failed *this is set to a ptime object - /// initialized to not_a_date_time. - bool FromUtc(const std::string& timezone_str); - bool FromUtc(boost::local_time::time_zone_ptr timezone); + /// this function is called upon has both a valid date and time. + void UtcToLocal(); void set_date(const boost::gregorian::date d) { date_ = d; } void set_time(const boost::posix_time::time_duration t) { time_ = t; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/service/fe-support.cc ---------------------------------------------------------------------- diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc index bb35089..d3681f9 100644 --- a/be/src/service/fe-support.cc +++ b/be/src/service/fe-support.cc @@ -28,8 +28,6 @@ #include "exec/catalog-op-executor.h" #include "exprs/expr-context.h" #include "exprs/expr.h" -#include "exprs/timestamp-functions.h" -#include "exprs/timezone_db.h" #include "gen-cpp/Data_types.h" #include "gen-cpp/Frontend_types.h" #include "rpc/jni-thrift-util.h" @@ -39,7 +37,6 @@ #include "runtime/hdfs-fs-cache.h" #include "runtime/lib-cache.h" #include "runtime/runtime-state.h" -#include "runtime/timestamp-value.h" #include "service/impala-server.h" #include "util/cpu-info.h" #include "util/debug-util.h" @@ -69,7 +66,6 @@ Java_org_apache_impala_service_FeSupport_NativeFeTestInit( // Init the JVM to load the classes in JniUtil that are needed for returning // exceptions to the FE. InitCommonRuntime(1, &name, true, TestInfo::FE_TEST); - ABORT_IF_ERROR(TimezoneDatabase::Initialize()); LlvmCodeGen::InitializeLlvm(true); ExecEnv* exec_env = new ExecEnv(); // This also caches it from the process. exec_env->InitForFeTests(); @@ -351,17 +347,6 @@ Java_org_apache_impala_service_FeSupport_NativePrioritizeLoad( return result_bytes; } -// Used to call native code from the FE to check if a timezone string is valid or not. -extern "C" -JNIEXPORT jboolean JNICALL -Java_org_apache_impala_service_FeSupport_NativeCheckIsValidTimeZone( - JNIEnv* env, jclass caller_class, jstring timezone) { - const char *tz = env->GetStringUTFChars(timezone, NULL); - jboolean tz_found = tz != NULL && TimezoneDatabase::FindTimezone(tz) != NULL; - env->ReleaseStringUTFChars(timezone, tz); - return tz_found; -} - namespace impala { static JNINativeMethod native_methods[] = { @@ -385,10 +370,6 @@ static JNINativeMethod native_methods[] = { (char*)"NativePrioritizeLoad", (char*)"([B)[B", (void*)::Java_org_apache_impala_service_FeSupport_NativePrioritizeLoad }, - { - (char*)"NativeCheckIsValidTimeZone", (char*)"(Ljava/lang/String;)Z", - (void*)::Java_org_apache_impala_service_FeSupport_NativeCheckIsValidTimeZone - } }; void InitFeSupport(bool disable_codegen) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/service/impala-server.cc ---------------------------------------------------------------------- diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index 10099b1..da09eb1 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -121,10 +121,6 @@ DEFINE_string(default_query_options, "", "key=value pair of default query option DEFINE_int32(query_log_size, 25, "Number of queries to retain in the query log. If -1, " "the query log has unbounded size."); DEFINE_bool(log_query_to_file, true, "if true, logs completed query profiles to file."); -DEFINE_bool(set_parquet_mr_int96_write_zone_to_utc_on_new_tables, false, "if true, sets " - "the parquet.mr.int96.write.zone table property to UTC for new tables created using " - "CREATE TABLE and CREATE TABLE LIKE <file>. You can find details in the " - "documentation."); DEFINE_int64(max_result_cache_size, 100000L, "Maximum number of query results a client " "may request to be cached on a per-query basis to support restarting fetches. This " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/be/src/util/backend-gflag-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc index 7b2cdd9..0781393 100644 --- a/be/src/util/backend-gflag-util.cc +++ b/be/src/util/backend-gflag-util.cc @@ -26,7 +26,6 @@ // Configs for the Frontend and the Catalog. DECLARE_bool(load_catalog_in_background); DECLARE_bool(load_auth_to_local_rules); -DECLARE_bool(set_parquet_mr_int96_write_zone_to_utc_on_new_tables); DECLARE_int32(non_impala_java_vlog); DECLARE_int32(read_size); DECLARE_int32(num_metadata_loading_threads); @@ -52,8 +51,6 @@ Status GetThriftBackendGflags(JNIEnv* jni_env, jbyteArray* cfg_bytes) { TBackendGflags cfg; cfg.__set_authorization_policy_file(FLAGS_authorization_policy_file); cfg.__set_load_catalog_in_background(FLAGS_load_catalog_in_background); - cfg.__set_set_parquet_mr_int96_write_zone_to_utc_on_new_tables( - FLAGS_set_parquet_mr_int96_write_zone_to_utc_on_new_tables); cfg.__set_server_name(FLAGS_server_name); cfg.__set_sentry_config(FLAGS_sentry_config); cfg.__set_authorization_policy_provider_class( http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/common/thrift/BackendGflags.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift index edf7422..09cf6f7 100644 --- a/common/thrift/BackendGflags.thrift +++ b/common/thrift/BackendGflags.thrift @@ -54,8 +54,4 @@ struct TBackendGflags { 16: required i32 kudu_operation_timeout_ms 17: required i32 initial_hms_cnxn_timeout_s - - // If true, new HDFS tables created using CREATE TABLE and CREATE TABLE LIKE <file> - // regardless of format will have the "parquet.mr.int96.write.zone" property set to UTC. - 18: required bool set_parquet_mr_int96_write_zone_to_utc_on_new_tables } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/common/thrift/PlanNodes.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift index e869afb..6299b9e 100644 --- a/common/thrift/PlanNodes.thrift +++ b/common/thrift/PlanNodes.thrift @@ -216,11 +216,6 @@ struct THdfsScanNode { // Map from SlotIds to the indices in TPlanNode.conjuncts that are eligible // for dictionary filtering. 9: optional map<Types.TSlotId, list<i32>> dictionary_filter_conjuncts - - // Specifies a time zone for adjusting timestamp values read from Parquet files written - // by parquet-mr. The actual value comes from "parquet.mr.int96.write.zone" table - // property. This is used for a Hive compatibilty fix. - 10: optional string parquet_mr_write_zone } struct TDataSourceScanNode { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/common/thrift/generate_error_codes.py ---------------------------------------------------------------------- diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py index 89abea4..8611295 100755 --- a/common/thrift/generate_error_codes.py +++ b/common/thrift/generate_error_codes.py @@ -316,10 +316,7 @@ error_codes = ( ("SCRATCH_READ_TRUNCATED", 102, "Error reading $0 bytes from scratch file '$1' at " "offset $2: could only read $3 bytes"), - ("PARQUET_MR_TIMESTAMP_CONVERSION_FAILED", 103, "Failed to convert timestamp '$0' to " - "timezone '$1' for a Parquet file in table '$2'."), - - ("KUDU_TIMESTAMP_OUT_OF_RANGE", 104, + ("KUDU_TIMESTAMP_OUT_OF_RANGE", 103, "Kudu table '$0' column '$1' contains an out of range timestamp. " "The valid date range is 1400-01-01..9999-12-31."), ) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java index b03d56d..2288663 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java @@ -30,7 +30,6 @@ import org.apache.impala.catalog.HdfsTable; import org.apache.impala.catalog.KuduTable; import org.apache.impala.catalog.Table; import org.apache.impala.common.AnalysisException; -import org.apache.impala.service.FeSupport; import org.apache.impala.thrift.TAlterTableParams; import org.apache.impala.thrift.TAlterTableSetTblPropertiesParams; import org.apache.impala.thrift.TAlterTableType; @@ -105,9 +104,6 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt { // Analyze 'skip.header.line.format' property. analyzeSkipHeaderLineCount(getTargetTable(), tblProperties_); - // Analyze 'parquet.mr.int96.write.zone' - analyzeParquetMrWriteZone(getTargetTable(), tblProperties_); - // Analyze 'sort.columns' property. analyzeSortColumns(getTargetTable(), tblProperties_); } @@ -168,29 +164,6 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt { } /** - * Analyze the 'parquet.mr.int96.write.zone' property to make sure it is set to a valid - * value. It is looked up in 'tblProperties', which must not be null. If 'table' is not - * null, then the method ensures that 'parquet.mr.int96.write.zone' is supported for its - * table type. If it is null, then this check is omitted. - */ - private static void analyzeParquetMrWriteZone(Table table, - Map<String, String> tblProperties) throws AnalysisException { - if (tblProperties.containsKey(HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE)) { - if (table != null && !(table instanceof HdfsTable)) { - throw new AnalysisException(String.format( - "Table property '%s' is only supported for HDFS tables.", - HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE)); - } - String timezone = tblProperties.get(HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE); - if (!FeSupport.CheckIsValidTimeZone(timezone)) { - throw new AnalysisException(String.format( - "Invalid time zone in the '%s' table property: %s", - HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE, timezone)); - } - } - } - - /** * Analyzes the 'sort.columns' property in 'tblProperties' against the columns of * 'table'. The property must store a list of column names separated by commas, and each * column in the property must occur in 'table' as a non-partitioning column. If there http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/analysis/BaseTableRef.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/BaseTableRef.java b/fe/src/main/java/org/apache/impala/analysis/BaseTableRef.java index ac97739..009851d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/BaseTableRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/BaseTableRef.java @@ -20,8 +20,6 @@ package org.apache.impala.analysis; import org.apache.impala.catalog.HdfsTable; import org.apache.impala.catalog.Table; import org.apache.impala.common.AnalysisException; -import org.apache.impala.service.FeSupport; - import com.google.common.base.Preconditions; /** @@ -67,7 +65,6 @@ public class BaseTableRef extends TableRef { analyzeHints(analyzer); analyzeJoin(analyzer); analyzeSkipHeaderLineCount(); - analyzeParquetMrWriteZone(); } @Override @@ -99,20 +96,4 @@ public class BaseTableRef extends TableRef { hdfsTable.parseSkipHeaderLineCount(error); if (error.length() > 0) throw new AnalysisException(error.toString()); } - - /** - * Analyze the 'parquet.mr.int96.write.zone' property. - */ - private void analyzeParquetMrWriteZone() throws AnalysisException { - Table table = getTable(); - if (!(table instanceof HdfsTable)) return; - HdfsTable hdfsTable = (HdfsTable)table; - - String timezone = hdfsTable.getParquetMrWriteZone(); - if (timezone != null && !FeSupport.CheckIsValidTimeZone(timezone)) { - throw new AnalysisException(String.format( - "Invalid time zone in the '%s' table property: %s", - HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE, timezone)); - } - } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index 8d266ec..cba821e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -23,13 +23,10 @@ import java.util.Map; import org.apache.avro.Schema; import org.apache.avro.SchemaParseException; import org.apache.impala.authorization.PrivilegeRequestBuilder; -import org.apache.impala.catalog.HdfsTable; import org.apache.impala.catalog.KuduTable; import org.apache.impala.catalog.RowFormat; import org.apache.impala.common.AnalysisException; import org.apache.impala.common.ImpalaRuntimeException; -import org.apache.impala.service.BackendConfig; -import org.apache.impala.service.FeSupport; import org.apache.impala.thrift.TCreateTableParams; import org.apache.impala.thrift.THdfsFileFormat; import org.apache.impala.thrift.TTableName; @@ -181,24 +178,6 @@ public class CreateTableStmt extends StatementBase { } AvroSchemaUtils.setFromSerdeComment(getColumnDefs()); } - - if (getTblProperties().containsKey(HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE)) { - if (getFileFormat() == THdfsFileFormat.KUDU) { - throw new AnalysisException(String.format( - "Table property '%s' is only supported for HDFS tables.", - HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE)); - } - String timezone = getTblProperties().get(HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE); - if (!FeSupport.CheckIsValidTimeZone(timezone)) { - throw new AnalysisException(String.format( - "Invalid time zone in the '%s' table property: %s", - HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE, timezone)); - } - } else if (BackendConfig.INSTANCE.isSetParquetMrWriteZoneToUtcOnNewTables()) { - if (getFileFormat() != THdfsFileFormat.KUDU) { - getTblProperties().put(HdfsTable.TBL_PROP_PARQUET_MR_WRITE_ZONE, "UTC"); - } - } } /** http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index e387d37..dcf74ce 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -114,10 +114,6 @@ public class HdfsTable extends Table { // Table property key for skip.header.line.count public static final String TBL_PROP_SKIP_HEADER_LINE_COUNT = "skip.header.line.count"; - // Table property key for parquet.mr.int96.write.zone - public static final String TBL_PROP_PARQUET_MR_WRITE_ZONE = - "parquet.mr.int96.write.zone"; - // string to indicate NULL. set in load() from table properties private String nullColumnValue_; @@ -1281,18 +1277,6 @@ public class HdfsTable extends Table { } /** - * Returns the value of the 'parquet.mr.int96.write.zone' table property. If the value - * is not set for the table, returns null. - */ - public String getParquetMrWriteZone() { - org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(); - if (msTbl == null) return null; - Map<String, String> tblProperties = msTbl.getParameters(); - if (tblProperties == null) return null; - return tblProperties.get(TBL_PROP_PARQUET_MR_WRITE_ZONE); - } - - /** * Sets avroSchema_ if the table or any of the partitions in the table are stored * as Avro. Additionally, this method also reconciles the schema if the column * definitions from the metastore differ from the Avro schema. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index a3286b4..361a4d2 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -155,6 +155,7 @@ public class HdfsScanNode extends ScanNode { private static final Configuration CONF = new Configuration(); + // List of conjuncts for min/max values of parquet::Statistics, that are used to skip // data when scanning Parquet files. private List<Expr> minMaxConjuncts_ = Lists.newArrayList(); @@ -767,10 +768,6 @@ public class HdfsScanNode extends ScanNode { if (skipHeaderLineCount_ > 0) { msg.hdfs_scan_node.setSkip_header_line_count(skipHeaderLineCount_); } - String parquetMrWriteZone = tbl_.getParquetMrWriteZone(); - if (parquetMrWriteZone != null) { - msg.hdfs_scan_node.setParquet_mr_write_zone(parquetMrWriteZone); - } msg.hdfs_scan_node.setUse_mt_scan_node(useMtScanNode_); if (!minMaxConjuncts_.isEmpty()) { for (Expr e: minMaxConjuncts_) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/service/BackendConfig.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java index 04de238..5a4a440 100644 --- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java +++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java @@ -54,9 +54,6 @@ public class BackendConfig { !Strings.isNullOrEmpty(backendCfg_.principal); } public int getKuduClientTimeoutMs() { return backendCfg_.kudu_operation_timeout_ms; } - public boolean isSetParquetMrWriteZoneToUtcOnNewTables() { - return backendCfg_.set_parquet_mr_int96_write_zone_to_utc_on_new_tables; - } public int getImpalaLogLevel() { return backendCfg_.impala_log_lvl; } public int getNonImpalaJavaVlogLevel() { return backendCfg_.non_impala_java_vlog; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/main/java/org/apache/impala/service/FeSupport.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/FeSupport.java b/fe/src/main/java/org/apache/impala/service/FeSupport.java index f712752..8b87962 100644 --- a/fe/src/main/java/org/apache/impala/service/FeSupport.java +++ b/fe/src/main/java/org/apache/impala/service/FeSupport.java @@ -84,10 +84,6 @@ public class FeSupport { // using Java Thrift bindings. public native static byte[] NativePrioritizeLoad(byte[] thriftReq); - // Returns true if timezone String is valid according to the BE timezone database, false - // otherwise. - public native static boolean NativeCheckIsValidTimeZone(String timezone); - /** * Locally caches the jar at the specified HDFS location. * @@ -265,16 +261,6 @@ public class FeSupport { } } - public static boolean CheckIsValidTimeZone(String timezone) { - if (timezone == null) return false; - try { - return NativeCheckIsValidTimeZone(timezone); - } catch (UnsatisfiedLinkError e) { - loadLibrary(); - } - return NativeCheckIsValidTimeZone(timezone); - } - /** * This function should only be called explicitly by the FeSupport to ensure that * native functions are loaded. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 0fe60a3..1a8cba5 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -677,58 +677,6 @@ public class AnalyzeDDLTest extends FrontendTestBase { } @Test - public void TestParquetMrInt96WriteZone() { - // Attempt to set 'parquet.mr.int96.write.zone' when creating a table. Positive cases. - AnalyzesOk("create table tbl (i int) tblproperties " + - "('parquet.mr.int96.write.zone'='EST')"); - AnalyzesOk("create table tbl tblproperties " + - "('parquet.mr.int96.write.zone'='EST') " + - "as select * from functional.alltypesnopart"); - AnalyzesOk("create external table tbl like parquet " + - "'/test-warehouse/alltypesagg_hive_13_1_parquet/" + - "alltypesagg_hive_13_1.parquet' " + - "stored as parquet " + - "tblproperties ('parquet.mr.int96.write.zone'='EST')"); - // Cannot set 'parquet.mr.int96.write.zone' table property when creating a non-HDFS - // table. - AnalysisError("create external table tbl stored as kudu tblproperties (" + - "'kudu.table_name'='tab'," + - "'kudu.master_addresses' = '127.0.0.1:8080, 127.0.0.1:8081'," + - "'parquet.mr.int96.write.zone'='EST')", - "Table property 'parquet.mr.int96.write.zone' is only supported for HDFS " + - "tables."); - // Cannot set 'parquet.mr.int96.write.zone' table property to an invalid time zone - // when creating a table. - AnalysisError("create table tbl (i int) tblproperties" + - "('parquet.mr.int96.write.zone'='garbage')", - "Invalid time zone in the 'parquet.mr.int96.write.zone' table property: garbage"); - AnalysisError("create table tbl tblproperties " + - "('parquet.mr.int96.write.zone'='garbage') " + - "as select * from functional.alltypesnopart", - "Invalid time zone in the 'parquet.mr.int96.write.zone' table property: garbage"); - AnalysisError("create external table tbl like parquet " + - "'/test-warehouse/alltypesagg_hive_13_1_parquet/" + - "alltypesagg_hive_13_1.parquet' " + - "stored as parquet " + - "tblproperties ('parquet.mr.int96.write.zone'='garbage')", - "Invalid time zone in the 'parquet.mr.int96.write.zone' table property: garbage"); - - // Attempt to set 'parquet.mr.int96.write.zone' table property. Positive case. - AnalyzesOk("alter table functional.alltypes set tblproperties" + - "('parquet.mr.int96.write.zone'='EST')"); - // Cannot set 'parquet.mr.int96.write.zone' table property on a table not backed by - // HDFS. - AnalysisError("alter table functional_kudu.alltypes set tblproperties" + - "('parquet.mr.int96.write.zone'='EST')", - "Table property 'parquet.mr.int96.write.zone' is only supported for HDFS " + - "tables."); - // Cannot set 'parquet.mr.int96.write.zone' table property to an invalid time zone. - AnalysisError("alter table functional.alltypes set tblproperties" + - "('parquet.mr.int96.write.zone'='garbage')", - "Invalid time zone in the 'parquet.mr.int96.write.zone' table property: garbage"); - } - - @Test public void TestAlterTableSetCached() { // Positive cases AnalyzesOk("alter table functional.alltypesnopart set cached in 'testPool'"); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/tests/common/impala_test_suite.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index f7602af..7cbacd3 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -507,29 +507,6 @@ class ImpalaTestSuite(BaseTestSuite): assert not result.success, "No failure encountered for query %s" % query return result - def _get_properties(self, section_name, table_name): - """Extracts the table properties mapping from the output of DESCRIBE FORMATTED""" - result = self.client.execute("describe formatted " + table_name) - match = False - properties = dict(); - for row in result.data: - if section_name in row: - match = True - elif match: - row = row.split('\t') - if (row[1] == 'NULL'): - break - properties[row[1].rstrip()] = row[2].rstrip() - return properties - - def get_table_properties(self, table_name): - """Extracts the table properties mapping from the output of DESCRIBE FORMATTED""" - return self._get_properties('Table Parameters:', table_name) - - def get_serde_properties(self, table_name): - """Extracts the serde properties mapping from the output of DESCRIBE FORMATTED""" - return self._get_properties('Storage Desc Params:', table_name) - @execute_wrapper def execute_query(self, query, query_options=None): return self.__execute_query(self.client, query, query_options) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/tests/custom_cluster/test_hive_parquet_timestamp_conversion.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_hive_parquet_timestamp_conversion.py b/tests/custom_cluster/test_hive_parquet_timestamp_conversion.py index fd631fd..4d7c202 100644 --- a/tests/custom_cluster/test_hive_parquet_timestamp_conversion.py +++ b/tests/custom_cluster/test_hive_parquet_timestamp_conversion.py @@ -21,41 +21,12 @@ import pytest import time from tests.common.custom_cluster_test_suite import CustomClusterTestSuite -from tests.util.filesystem_utils import get_fs_path class TestHiveParquetTimestampConversion(CustomClusterTestSuite): - '''Hive writes timestamps in Parquet files by first converting values from local time + '''Hive writes timestamps in parquet files by first converting values from local time to UTC. The conversion was not expected (other file formats don't convert) and a - startup flag (-convert_legacy_hive_parquet_utc_timestamps) was later added to adjust - for this (IMPALA-1658). IMPALA-2716 solves the issue in a more general way by - introducing a table property ('parquet.mr.int96.write.zone') that specifies the time - zone to convert the timestamp values to. - - This file tests that the table property and the startup option behave as expected in - the following scenarios: - 1. If the 'parquet.mr.int96.write.zone' table property is set, Impala ignores the - -convert_legacy_hive_parquet_utc_timestamps startup option. It reads Parquet - timestamp data written by Hive and adjusts values using the time zone from the - table property. - 2. If the 'parquet.mr.int96.write.zone' table property is not set, the - -convert_legacy_hive_parquet_utc_timestamps startup option is taken into account. - a. If the startup option is set to true, Impala reads Parquet timestamp data - created by Hive and adjusts values using the local time zone. - b. If the startup option is absent or set to false, no adjustment will be applied - to timestamp values. - - IMPALA-2716 also introduces a startup option - (-set_parquet_mr_int96_write_zone_to_utc_on_new_tables) that determines if the table - property will be set on newly created tables. This file tests the basic behavior of the - startup option: - 1. Tables created with the 'parquet.mr.int96.write.zone' table property explicitly - set, will keep the value the property is set to. - 2. If -set_parquet_mr_int96_write_zone_to_utc_on_new_tables is set to true, tables - created using CREATE TABLE, CREATE TABLE AS SELECT and CREATE TABLE LIKE <FILE> - will set the table property to UTC. - 3. Tables created using CREATE TABLE LIKE <OTHER TABLE> will ignore the value of - -set_parquet_mr_int96_write_zone_to_utc_on_new_tables and copy the property of - the table that is copied. + startup flag was later added to adjust for this (IMPALA-1658). This file tests that + the conversion and flag behave as expected. ''' @classmethod @@ -65,12 +36,11 @@ class TestHiveParquetTimestampConversion(CustomClusterTestSuite): v.get_value('table_format').file_format == 'parquet' and v.get_value('table_format').compression_codec == 'none') - def check_sanity(self, expect_converted_result, - tbl_name='functional_parquet.alltypesagg_hive_13_1'): + def check_sanity(self, expect_converted_result): data = self.execute_query_expect_success(self.client, """ SELECT COUNT(timestamp_col), COUNT(DISTINCT timestamp_col), MIN(timestamp_col), MAX(timestamp_col) - FROM {0}""".format(tbl_name))\ + FROM functional_parquet.alltypesagg_hive_13_1""")\ .get_data() assert len(data) > 0 rows = data.split("\n") @@ -88,63 +58,22 @@ class TestHiveParquetTimestampConversion(CustomClusterTestSuite): assert values[2] == "2010-01-01 00:00:00" assert values[3] == "2010-01-10 18:02:05.100000000" - def get_parquet_mr_write_zone_tbl_prop(self, - tbl_name='functional_parquet.alltypesagg_hive_13_1'): - tbl_prop = self.get_table_properties(tbl_name) - if 'parquet.mr.int96.write.zone' not in tbl_prop: - return None - return tbl_prop['parquet.mr.int96.write.zone'] - @pytest.mark.execute_serially @CustomClusterTestSuite.with_args("-convert_legacy_hive_parquet_utc_timestamps=true") - def test_conversion_to_tbl_prop_timezone(self, vector, unique_database): - # Create table with 'parquet.mr.int96.write.zone' property set to China Standard Time. - # The undelying parquet file has been written by Hive. - hive_tbl = '%s.hive_tbl' % unique_database - parquet_loc = get_fs_path('/test-warehouse/alltypesagg_hive_13_1_parquet') - parquet_path = get_fs_path( - '/test-warehouse/alltypesagg_hive_13_1_parquet/alltypesagg_hive_13_1.parquet') - self.client.execute('''CREATE EXTERNAL TABLE {0} LIKE PARQUET "{1}" - STORED AS PARQUET LOCATION "{2}" - TBLPROPERTIES ('parquet.mr.int96.write.zone'='China Standard Time') - '''.format(hive_tbl, parquet_path, parquet_loc)) - # Make sure that the table property has been properly set. - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=hive_tbl) ==\ - 'China Standard Time' - # Even though -convert_legacy_hive_parquet_utc_timestamps is set to true it is ignored - # because the 'parquet.mr.int06.write.zone' table property is also set. The value read - # from the Hive table should be the same as the corresponding Impala timestamp value - # converted from UTC to China Standard Time. - self.check_sanity(True, tbl_name=hive_tbl) - data = self.execute_query_expect_success(self.client, """ - SELECT h.id, h.day, h.timestamp_col, i.timestamp_col - FROM {0} h - JOIN functional_parquet.alltypesagg i - ON i.id = h.id AND i.day = h.day -- serves as a unique key - WHERE - (h.timestamp_col IS NULL) != (i.timestamp_col IS NULL) - OR h.timestamp_col != FROM_UTC_TIMESTAMP(i.timestamp_col, 'China Standard Time') - """.format(hive_tbl))\ - .get_data() - assert len(data) == 0 - - @pytest.mark.execute_serially - @CustomClusterTestSuite.with_args("-convert_legacy_hive_parquet_utc_timestamps=true") - def test_conversion_to_localtime(self, vector): + def test_conversion(self, vector): tz_name = time.tzname[time.localtime().tm_isdst] self.check_sanity(tz_name not in ("UTC", "GMT")) - # Make sure that the table property is not set - assert self.get_parquet_mr_write_zone_tbl_prop() == None # The value read from the Hive table should be the same as reading a UTC converted # value from the Impala table. tz_name = time.tzname[time.localtime().tm_isdst] data = self.execute_query_expect_success(self.client, """ SELECT h.id, h.day, h.timestamp_col, i.timestamp_col FROM functional_parquet.alltypesagg_hive_13_1 h - JOIN functional_parquet.alltypesagg i - ON i.id = h.id AND i.day = h.day -- serves as a unique key + JOIN functional_parquet.alltypesagg + i ON i.id = h.id AND i.day = h.day -- serves as a unique key WHERE - (h.timestamp_col IS NULL) != (i.timestamp_col IS NULL) + (h.timestamp_col IS NULL AND i.timestamp_col IS NOT NULL) + OR (h.timestamp_col IS NOT NULL AND i.timestamp_col IS NULL) OR h.timestamp_col != FROM_UTC_TIMESTAMP(i.timestamp_col, '%s') """ % tz_name)\ .get_data() @@ -154,15 +83,13 @@ class TestHiveParquetTimestampConversion(CustomClusterTestSuite): @CustomClusterTestSuite.with_args("-convert_legacy_hive_parquet_utc_timestamps=false") def test_no_conversion(self, vector): self.check_sanity(False) - # Make sure that the table property is not set - assert self.get_parquet_mr_write_zone_tbl_prop() == None # Without conversion all the values will be different. tz_name = time.tzname[time.localtime().tm_isdst] data = self.execute_query_expect_success(self.client, """ SELECT h.id, h.day, h.timestamp_col, i.timestamp_col FROM functional_parquet.alltypesagg_hive_13_1 h - JOIN functional_parquet.alltypesagg i - ON i.id = h.id AND i.day = h.day -- serves as a unique key + JOIN functional_parquet.alltypesagg + i ON i.id = h.id AND i.day = h.day -- serves as a unique key WHERE h.timestamp_col != FROM_UTC_TIMESTAMP(i.timestamp_col, '%s') """ % tz_name)\ .get_data() @@ -174,76 +101,11 @@ class TestHiveParquetTimestampConversion(CustomClusterTestSuite): data = self.execute_query_expect_success(self.client, """ SELECT h.id, h.day, h.timestamp_col, i.timestamp_col FROM functional_parquet.alltypesagg_hive_13_1 h - JOIN functional_parquet.alltypesagg i - ON i.id = h.id AND i.day = h.day -- serves as a unique key + JOIN functional_parquet.alltypesagg + i ON i.id = h.id AND i.day = h.day -- serves as a unique key WHERE - (h.timestamp_col IS NULL) != (i.timestamp_col IS NULL) + (h.timestamp_col IS NULL AND i.timestamp_col IS NOT NULL) + OR (h.timestamp_col IS NOT NULL AND i.timestamp_col IS NULL) """)\ .get_data() assert len(data) == 0 - - @pytest.mark.execute_serially - @CustomClusterTestSuite.with_args( - "-set_parquet_mr_int96_write_zone_to_utc_on_new_tables=true") - def test_new_table_enable_set_tbl_prop_to_utc(self, unique_database): - # Table created with CREATE TABLE will set the table property to UTC. - tbl1_name = '%s.table1' % unique_database - self.client.execute('CREATE TABLE {0} (id int)'.format(tbl1_name)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl1_name) == 'UTC' - # Table created with CREATE TABLE will honor the explicitly set property. - tbl_est_name = '%s.table_est' % unique_database - self.client.execute('''CREATE TABLE {0} (id int) - TBLPROPERTIES ('parquet.mr.int96.write.zone'='EST') - '''.format(tbl_est_name)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl_est_name) == 'EST' - # Table created with CREATE TABLE AS SELECT will set the table property to UTC. Table - # property is not copied from the other table. - tbl2_name = '%s.table2' % unique_database - self.client.execute('CREATE TABLE {0} AS SELECT * FROM {1}'.format( - tbl2_name, tbl_est_name)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl2_name) == 'UTC' - # Table created with CREATE TABLE LIKE <FILE> will set the table property to UTC. - tbl3_name = '%s.tbl3_name' % unique_database - parquet_path = get_fs_path( - '/test-warehouse/alltypesagg_hive_13_1_parquet/alltypesagg_hive_13_1.parquet') - self.client.execute('CREATE EXTERNAL TABLE {0} LIKE PARQUET "{1}"'.format( - tbl3_name, parquet_path)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl3_name) == 'UTC' - # Table created with CREATE TABLE LIKE <OTHER TABLE> will copy the property from the - # other table. - tbl4_name = '%s.tbl4_name' % unique_database - self.client.execute('CREATE TABLE {0} LIKE {1}'.format(tbl4_name, tbl_est_name)); - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl4_name) == 'EST' - - @pytest.mark.execute_serially - @CustomClusterTestSuite.with_args( - "-set_parquet_mr_int96_write_zone_to_utc_on_new_tables=false") - def test_new_table_disable_set_tbl_prop_to_utc(self, unique_database): - # Table created with CREATE TABLE will not set the table property. - tbl1_name = '%s.table1' % unique_database - self.client.execute('CREATE TABLE {0} (id int)'.format(tbl1_name)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl1_name) == None - # Table created with CREATE TABLE will honor the explicitly set property. - tbl_est_name = '%s.table_est' % unique_database - self.client.execute('''CREATE TABLE {0} (id int) - TBLPROPERTIES ('parquet.mr.int96.write.zone'='EST') - '''.format(tbl_est_name)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl_est_name) == 'EST' - # Table created with CREATE TABLE AS SELECT will not set the table property. Table - # property is not copied from the other table. - tbl2_name = '%s.table2' % unique_database - self.client.execute('CREATE TABLE {0} AS SELECT * FROM {1}'.format( - tbl2_name, tbl_est_name)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl2_name) == None - # Table created with CREATE TABLE LIKE <FILE> will not set the table property. - tbl3_name = '%s.tbl3_name' % unique_database - parquet_path = get_fs_path( - '/test-warehouse/alltypesagg_hive_13_1_parquet/alltypesagg_hive_13_1.parquet') - self.client.execute('CREATE EXTERNAL TABLE {0} LIKE PARQUET "{1}"'.format( - tbl3_name, parquet_path)) - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl3_name) == None - # Table created with CREATE TABLE LIKE <OTHER TABLE> will copy the property from the - # other table. - tbl4_name = '%s.tbl4_name' % unique_database - self.client.execute('CREATE TABLE {0} LIKE {1}'.format(tbl4_name, tbl_est_name)); - assert self.get_parquet_mr_write_zone_tbl_prop(tbl_name=tbl4_name) == 'EST' http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/tests/metadata/test_ddl.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index 2dc950b..37ca7cd 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -368,7 +368,7 @@ class TestDdlStatements(TestDdlBase): self.client.execute("""create table {0} (i int) with serdeproperties ('s1'='s2', 's3'='s4') tblproperties ('p1'='v0', 'p1'='v1')""".format(fq_tbl_name)) - properties = self.get_table_properties(fq_tbl_name) + properties = self._get_tbl_properties(fq_tbl_name) assert len(properties) == 2 # The transient_lastDdlTime is variable, so don't verify the value. @@ -376,19 +376,19 @@ class TestDdlStatements(TestDdlBase): del properties['transient_lastDdlTime'] assert {'p1': 'v1'} == properties - properties = self.get_serde_properties(fq_tbl_name) + properties = self._get_serde_properties(fq_tbl_name) assert {'s1': 's2', 's3': 's4'} == properties # Modify the SERDEPROPERTIES using ALTER TABLE SET. self.client.execute("alter table {0} set serdeproperties " "('s1'='new', 's5'='s6')".format(fq_tbl_name)) - properties = self.get_serde_properties(fq_tbl_name) + properties = self._get_serde_properties(fq_tbl_name) assert {'s1': 'new', 's3': 's4', 's5': 's6'} == properties # Modify the TBLPROPERTIES using ALTER TABLE SET. self.client.execute("alter table {0} set tblproperties " "('prop1'='val1', 'p2'='val2', 'p2'='val3', ''='')".format(fq_tbl_name)) - properties = self.get_table_properties(fq_tbl_name) + properties = self._get_tbl_properties(fq_tbl_name) assert 'transient_lastDdlTime' in properties assert properties['p1'] == 'v1' http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/tests/metadata/test_ddl_base.py ---------------------------------------------------------------------- diff --git a/tests/metadata/test_ddl_base.py b/tests/metadata/test_ddl_base.py index acda99f..3044ef0 100644 --- a/tests/metadata/test_ddl_base.py +++ b/tests/metadata/test_ddl_base.py @@ -64,3 +64,26 @@ class TestDdlBase(ImpalaTestSuite): db_name, comment, WAREHOUSE) impala_client.execute(ddl) impala_client.close() + + def _get_tbl_properties(self, table_name): + """Extracts the table properties mapping from the output of DESCRIBE FORMATTED""" + return self._get_properties('Table Parameters:', table_name) + + def _get_serde_properties(self, table_name): + """Extracts the serde properties mapping from the output of DESCRIBE FORMATTED""" + return self._get_properties('Storage Desc Params:', table_name) + + def _get_properties(self, section_name, table_name): + """Extracts the table properties mapping from the output of DESCRIBE FORMATTED""" + result = self.client.execute("describe formatted " + table_name) + match = False + properties = dict(); + for row in result.data: + if section_name in row: + match = True + elif match: + row = row.split('\t') + if (row[1] == 'NULL'): + break + properties[row[1].rstrip()] = row[2].rstrip() + return properties http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/21f90633/tests/query_test/test_parquet_timestamp_compatibility.py ---------------------------------------------------------------------- diff --git a/tests/query_test/test_parquet_timestamp_compatibility.py b/tests/query_test/test_parquet_timestamp_compatibility.py deleted file mode 100644 index 37e6398..0000000 --- a/tests/query_test/test_parquet_timestamp_compatibility.py +++ /dev/null @@ -1,135 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -import pytest -import time - -from tests.beeswax.impala_beeswax import ImpalaBeeswaxException -from tests.common.impala_test_suite import ImpalaTestSuite -from tests.util.filesystem_utils import WAREHOUSE, get_fs_path - -class TestParquetTimestampCompatibility(ImpalaTestSuite): - '''Hive adjusts timestamps by subtracting the local time zone's offset from all values - when writing data to Parquet files. As a result of this adjustment, Impala may read - "incorrect" timestamp values from Parquet files written by Hive. To fix the problem - a table property ('parquet.mr.int96.write.zone') was introduced in IMPALA-2716 that - specifies the time zone to convert the timesamp values to. - - This file tests the following scenarios: - 1. If the 'parquet.mr.int96.write.zone' table property is set to an invalid time zone - (by Hive), Impala throws an error when analyzing a query against the table. - 2. If the 'parquet.mr.int96.write.zone' table property is set to a valid time zone: - a. Impala adjusts timestamp values read from Parquet files created by Hive using - the time zone from the table property. - b. Impala does not adjust timestamp values read from Parquet files created by - Impala. - ''' - - @classmethod - def get_workload(cls): - return 'functional-query' - - @classmethod - def add_test_dimensions(cls): - super(TestParquetTimestampCompatibility, cls).add_test_dimensions() - cls.ImpalaTestMatrix.add_constraint(lambda v: - v.get_value('table_format').file_format == 'parquet' and - v.get_value('table_format').compression_codec == 'none') - - def _setup_env(self, hive_tbl_name, impala_tbl_name=None): - parquet_loc = get_fs_path('/test-warehouse/alltypesagg_hive_13_1_parquet') - parquet_fn = get_fs_path( - '/test-warehouse/alltypesagg_hive_13_1_parquet/alltypesagg_hive_13_1.parquet') - self.client.execute('''CREATE EXTERNAL TABLE {0} - LIKE PARQUET "{1}" - STORED AS PARQUET LOCATION "{2}" - '''.format(hive_tbl_name, parquet_fn, parquet_loc)) - if impala_tbl_name: - self.client.execute('''CREATE TABLE {0} - STORED AS PARQUET AS - SELECT * FROM {1} - '''.format(impala_tbl_name, hive_tbl_name)) - - def _set_tbl_timezone(self, tbl_name, tz_name): - self.client.execute('''ALTER TABLE {0} - SET TBLPROPERTIES ('parquet.mr.int96.write.zone'='{1}') - '''.format(tbl_name, tz_name)) - - def _get_parquet_mr_write_zone_tbl_prop(self, tbl_name): - tbl_prop = self.get_table_properties(tbl_name) - if 'parquet.mr.int96.write.zone' not in tbl_prop: - return None - return tbl_prop['parquet.mr.int96.write.zone'] - - def test_invalid_parquet_mr_write_zone(self, vector, unique_database): - # Hive doesn't allow setting 'parquet.mr.int96.write.zone' table property to an - # invalid time zone anymore. - pytest.skip() - - hive_tbl_name = '%s.hive_table' % unique_database - self._setup_env(hive_tbl_name) - # Hive sets the table property to an invalid time zone - self.run_stmt_in_hive('''ALTER TABLE {0} - SET TBLPROPERTIES ('parquet.mr.int96.write.zone'='garbage') - '''.format(hive_tbl_name)) - self.client.execute('REFRESH %s' % hive_tbl_name) - # Impala throws an error when the table is queried - try: - self.client.execute('SELECT timestamp_col FROM %s' % hive_tbl_name) - except ImpalaBeeswaxException, e: - if "Invalid time zone" not in str(e): - raise e - else: - assert False, "Query was expected to fail" - - def test_parquet_timestamp_conversion(self, vector, unique_database): - hive_tbl_name = '%s.hive_table' % unique_database - impala_tbl_name = '%s.impala_table' % unique_database - self._setup_env(hive_tbl_name, impala_tbl_name) - for tz_name in ['UTC', 'EST', 'China Standard Time', 'CET']: - # impala_table's underlying Parquet file was written by Impala. No conversion is - # performed on the timestamp values, no matter what value - # 'parquet.mr.int96.write.zone' is set to. - self._set_tbl_timezone(impala_tbl_name, tz_name) - data = self.execute_query_expect_success(self.client, """ - SELECT i2.id, i2.day, i2.timestamp_col, i1.timestamp_col - FROM functional.alltypesagg i1 - JOIN {0} i2 - ON i1.id = i2.id AND i1.day = i2.day -- serves as a unique key - WHERE - (i1.timestamp_col IS NULL) != (i2.timestamp_col IS NULL) - OR i1.timestamp_col != i2.timestamp_col - """.format(impala_tbl_name))\ - .get_data() - assert len(data) == 0 - assert self._get_parquet_mr_write_zone_tbl_prop(impala_tbl_name) == tz_name - # hive_table's underlying Parquet file was written by Hive. Setting the - # 'parquet.mr.int96.write.zone' table property to tz_name triggers a 'UTC' -> - # tz_name conversion on the timestamp values. - self._set_tbl_timezone(hive_tbl_name, tz_name) - data = self.execute_query_expect_success(self.client, """ - SELECT h.id, h.day, h.timestamp_col, i.timestamp_col - FROM functional.alltypesagg i - JOIN {0} h - ON i.id = h.id AND i.day = h.day -- serves as a unique key - WHERE - (h.timestamp_col IS NULL) != (i.timestamp_col IS NULL) - OR h.timestamp_col != FROM_UTC_TIMESTAMP(i.timestamp_col, '{1}') - """.format(hive_tbl_name, tz_name))\ - .get_data() - assert len(data) == 0 - assert self._get_parquet_mr_write_zone_tbl_prop(hive_tbl_name) == tz_name
