Repository: orc Updated Branches: refs/heads/orc-10 [created] 6dce746c3
ORC-10. Correct bug when moving ORC files between timezones. Project: http://git-wip-us.apache.org/repos/asf/orc/repo Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/6dce746c Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/6dce746c Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/6dce746c Branch: refs/heads/orc-10 Commit: 6dce746c30098a7b708dfa0aa5010260fd505ca6 Parents: ed772e7 Author: Owen O'Malley <[email protected]> Authored: Thu Feb 11 15:21:03 2016 -0800 Committer: Owen O'Malley <[email protected]> Committed: Mon Feb 22 22:06:37 2016 -0800 ---------------------------------------------------------------------- README.md | 1 - c++/src/Adaptor.hh.in | 8 + c++/src/CMakeLists.txt | 4 + c++/src/ColumnPrinter.cc | 2 +- c++/src/ColumnReader.cc | 10 +- c++/src/ColumnReader.hh | 7 +- c++/src/Reader.cc | 38 +- c++/src/Timezone.cc | 964 +++++++++++++++++++++++++++++++++++++ c++/src/Timezone.hh | 121 +++++ c++/test/CMakeLists.txt | 1 + c++/test/TestColumnPrinter.cc | 24 +- c++/test/TestColumnReader.cc | 33 +- c++/test/TestTimezone.cc | 361 ++++++++++++++ docker/run-all.sh | 7 +- tools/src/CMakeLists.txt | 2 +- 15 files changed, 1526 insertions(+), 57 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/README.md ---------------------------------------------------------------------- diff --git a/README.md b/README.md index 723e835..7711d4e 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,6 @@ _Optimized Row Columnar_ (ORC) file format. ```shell -To compile: -% export TZ=America/Los_Angeles % mkdir build % cd build % cmake .. http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/Adaptor.hh.in ---------------------------------------------------------------------- diff --git a/c++/src/Adaptor.hh.in b/c++/src/Adaptor.hh.in index 5b3a677..568054b 100644 --- a/c++/src/Adaptor.hh.in +++ b/c++/src/Adaptor.hh.in @@ -87,6 +87,14 @@ #define UINT32_MAX 0xffffffff #endif +#ifndef INT64_MAX + #define INT64_MAX 0x7fffffffffffffff +#endif + +#ifndef INT64_MIN + #define INT64_MIN (-0x7fffffffffffffff - 1) +#endif + #define GTEST_LANG_CXX11 0 #endif /* ADAPTER_HH */ http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index b7b5260..545e3ff 100644 --- a/c++/src/CMakeLists.txt +++ b/c++/src/CMakeLists.txt @@ -70,8 +70,11 @@ CHECK_CXX_SOURCE_RUNS(" ) CHECK_CXX_SOURCE_RUNS(" + #include<stdlib.h> #include<time.h> int main(int, char *[]) { + setenv(\"TZ\", \"America/Los_Angeles\", 1); + tzset(); struct tm time2037; struct tm time2038; strptime(\"2037-05-05 12:34:56\", \"%Y-%m-%d %H:%M:%S\", &time2037); @@ -119,6 +122,7 @@ add_library (orc STATIC RLEv1.cc RLEv2.cc RLE.cc + Timezone.cc TypeImpl.cc Vector.cc ) http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/ColumnPrinter.cc ---------------------------------------------------------------------- diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc index 764da01..2462143 100644 --- a/c++/src/ColumnPrinter.cc +++ b/c++/src/ColumnPrinter.cc @@ -712,7 +712,7 @@ namespace orc { int64_t nanos = nanoseconds[rowId]; time_t secs = static_cast<time_t>(seconds[rowId]); struct tm tmValue; - localtime_r(&secs, &tmValue); + gmtime_r(&secs, &tmValue); char timeBuffer[20]; strftime(timeBuffer, sizeof(timeBuffer), "%Y-%m-%d %H:%M:%S", &tmValue); writeChar(buffer, '"'); http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/ColumnReader.cc ---------------------------------------------------------------------- diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index ae4d9b6..8db16be 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -265,6 +265,7 @@ namespace orc { private: std::unique_ptr<orc::RleDecoder> secondsRle; std::unique_ptr<orc::RleDecoder> nanoRle; + const Timezone& writerTimezone; const int64_t epochOffset; public: @@ -281,8 +282,9 @@ namespace orc { TimestampColumnReader::TimestampColumnReader(const Type& type, StripeStreams& stripe - ): ColumnReader(type, stripe), - epochOffset(stripe.getEpochOffset()) { + ): ColumnReader(type, stripe), + writerTimezone(stripe.getWriterTimezone()), + epochOffset(writerTimezone.getEpoch()) { RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); secondsRle = createRleDecoder(stripe.getStream(columnId, proto::Stream_Kind_DATA, @@ -327,7 +329,9 @@ namespace orc { nanoBuffer[i] *= 10; } } - secsBuffer[i] += epochOffset; + int64_t writerTime = secsBuffer[i] + epochOffset; + secsBuffer[i] = writerTime + + writerTimezone.getVariant(writerTime).gmtOffset; if (secsBuffer[i] < 0 && nanoBuffer[i] != 0) { secsBuffer[i] -= 1; } http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/ColumnReader.hh ---------------------------------------------------------------------- diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh index 142d41e..5cfd781 100644 --- a/c++/src/ColumnReader.hh +++ b/c++/src/ColumnReader.hh @@ -22,6 +22,7 @@ #include "orc/Vector.hh" #include "ByteRLE.hh" #include "Compression.hh" +#include "Timezone.hh" #include "wrap/orc-proto-wrapper.hh" namespace orc { @@ -65,11 +66,9 @@ namespace orc { virtual MemoryPool& getMemoryPool() const = 0; /** - * Get the number of seconds between the ORC epoch and Unix epoch. - * ORC epoch is 1 Jan 2015 00:00:00 local. - * Unix epoch is 1 Jan 1970 00:00:00 UTC. + * Get the writer's timezone, so that we can convert their dates correctly. */ - virtual int64_t getEpochOffset() const = 0; + virtual const Timezone& getWriterTimezone() const = 0; }; /** http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/Reader.cc ---------------------------------------------------------------------- diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 29bd439..2aa3ef5 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -1062,7 +1062,7 @@ namespace orc { class ReaderImpl : public Reader { private: - const int64_t epochOffset; + const std::string localTimezone; // inputs std::unique_ptr<InputStream> stream; @@ -1215,27 +1215,13 @@ namespace orc { } } - int64_t getEpochOffset() { - // Build the literal for the ORC epoch - // 2015 Jan 1 00:00:00 - struct tm epoch; - epoch.tm_sec = 0; - epoch.tm_min = 0; - epoch.tm_hour = 0; - epoch.tm_mday = 1; - epoch.tm_mon = 0; - epoch.tm_year = 2015 - 1900; - epoch.tm_isdst = 0; - return static_cast<int64_t>(mktime(&epoch)); - } - ReaderImpl::ReaderImpl(std::unique_ptr<InputStream> input, const ReaderOptions& opts, std::unique_ptr<proto::PostScript> _postscript, std::unique_ptr<proto::Footer> _footer, uint64_t _fileLength, uint64_t _postscriptLength - ): epochOffset(getEpochOffset()), + ): localTimezone(getLocalTimezoneName()), stream(std::move(input)), options(opts), fileLength(_fileLength), @@ -1588,7 +1574,7 @@ namespace orc { const uint64_t stripeStart; InputStream& input; MemoryPool& memoryPool; - const int64_t epochOffset; + const Timezone& writerTimezone; public: StripeStreamsImpl(const ReaderImpl& reader, @@ -1596,7 +1582,7 @@ namespace orc { uint64_t stripeStart, InputStream& input, MemoryPool& memoryPool, - int64_t epochOffset); + const Timezone& writerTimezone); virtual ~StripeStreamsImpl(); @@ -1614,7 +1600,7 @@ namespace orc { MemoryPool& getMemoryPool() const override; - int64_t getEpochOffset() const override; + const Timezone& getWriterTimezone() const override; }; uint64_t maxStreamsForType(const proto::Type& type) { @@ -1727,13 +1713,13 @@ namespace orc { uint64_t _stripeStart, InputStream& _input, MemoryPool& _memoryPool, - int64_t _epochOffset + const Timezone& _writerTimezone ): reader(_reader), footer(_footer), stripeStart(_stripeStart), input(_input), memoryPool(_memoryPool), - epochOffset(_epochOffset) { + writerTimezone(_writerTimezone) { // PASS } @@ -1754,8 +1740,8 @@ namespace orc { return footer.columns(static_cast<int>(columnId)); } - int64_t StripeStreamsImpl::getEpochOffset() const { - return epochOffset; + const Timezone& StripeStreamsImpl::getWriterTimezone() const { + return writerTimezone; } std::unique_ptr<SeekableInputStream> @@ -1795,11 +1781,15 @@ namespace orc { currentStripeInfo = footer->stripes(static_cast<int>(currentStripe)); currentStripeFooter = getStripeFooter(currentStripeInfo); rowsInCurrentStripe = currentStripeInfo.numberofrows(); + const Timezone& writerTimezone = + (currentStripeFooter.has_writertimezone() ? + getTimezoneByName(currentStripeFooter.writertimezone()) : + getTimezoneByName(localTimezone)); StripeStreamsImpl stripeStreams(*this, currentStripeFooter, currentStripeInfo.offset(), *(stream.get()), memoryPool, - epochOffset); + writerTimezone); reader = buildReader(*(schema.get()), stripeStreams); } http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/Timezone.cc ---------------------------------------------------------------------- diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc new file mode 100644 index 0000000..0d6ff93 --- /dev/null +++ b/c++/src/Timezone.cc @@ -0,0 +1,964 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Timezone.hh" + +#include <errno.h> +#include <iostream> +#include <fcntl.h> +#include <map> +#include <sstream> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> + +namespace orc { + + // default location of the timezone files + static const char DEFAULT_TZDIR[] = "/usr/share/zoneinfo"; + + // location of a symlink to the local timezone + static const char LOCAL_TIMEZONE[] = "/etc/localtime"; + + enum TransitionKind { + TRANSITION_JULIAN, + TRANSITION_DAY, + TRANSITION_MONTH + }; + + static const int64_t MONTHS_PER_YEAR = 12; + /** + * The number of days in each month in non-leap and leap years. + */ + static const int64_t DAYS_PER_MONTH[2][MONTHS_PER_YEAR] = + {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + static const int64_t SECONDS_PER_HOUR = 60 * 60; + static const int64_t SECONDS_PER_DAY = SECONDS_PER_HOUR * 24; + static const int64_t DAYS_PER_WEEK = 7; + + // Leap years and day of the week repeat every 400 years, which makes it + // a good cycle length. + static const int64_t SECONDS_PER_400_YEARS = + SECONDS_PER_DAY * (365 * (300 + 3) + 366 * (100 - 3)); + + /** + * Is the given year a leap year? + */ + bool isLeap(int64_t year) { + return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); + } + + /** + * Find the position that is the closest and less than or equal to the + * target. + * @return -1 if the target < array[0] or + * i if array[i] <= target and (i == n or array[i] < array[i+1]) + */ + int64_t binarySearch(const std::vector<int64_t> &array, int64_t target) { + uint64_t min = 0; + uint64_t max = array.size() - 1; + uint64_t mid = (min + max) / 2; + while ((array[mid] != target) && (min < max)) { + if (array[mid] < target) { + min = mid + 1; + } else if (mid == 0) { + max = 0; + } else { + max = mid - 1; + } + mid = (min + max) / 2; + } + if (target < array[mid]) { + return static_cast<int64_t>(mid) - 1; + } else { + return static_cast<int64_t>(mid); + } + } + + struct Transition { + TransitionKind kind; + int64_t day; + int64_t week; + int64_t month; + int64_t time; + + std::string toString() const { + std::stringstream buffer; + switch (kind) { + case TRANSITION_JULIAN: + buffer << "julian " << day; + break; + case TRANSITION_DAY: + buffer << "day " << day; + break; + case TRANSITION_MONTH: + buffer << "month " << month << " week " << week << " day " << day; + break; + } + buffer << " at " << (time / (60 * 60)) << ":" << ((time / 60) % 60) + << ":" << (time % 60); + return buffer.str(); + } + + /** + * Get the transition time for the given year. + * @param year the year + * @return the number of seconds past local Jan 1 00:00:00 that the + * transition happens. + */ + int64_t getTime(int64_t year) const { + int64_t result = time; + switch (kind) { + case TRANSITION_JULIAN: + result += SECONDS_PER_DAY * day; + if (day > 60 && isLeap(year)) { + result += SECONDS_PER_DAY; + } + break; + case TRANSITION_DAY: + result += SECONDS_PER_DAY * day; + break; + case TRANSITION_MONTH: { + bool inLeap = isLeap(year); + int64_t adjustedMonth = (month + 9) % 12 + 1; + int64_t adjustedYear = (month <= 2) ? (year - 1) : year; + int64_t adjustedCentury = adjustedYear / 100; + int64_t adjustedRemainder = adjustedYear % 100; + + // day of the week of the first day of month + int64_t dayOfWeek = ((26 * adjustedMonth - 2) / 10 + + 1 + adjustedRemainder + adjustedRemainder / 4 + + adjustedCentury / 4 - 2 * adjustedCentury) % 7; + if (dayOfWeek < 0) { + dayOfWeek += DAYS_PER_WEEK; + } + + int64_t d = day - dayOfWeek; + if (d < 0) { + d += DAYS_PER_WEEK; + } + for (int w = 1; w < week; ++w) { + if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - 1]) { + break; + } + d += DAYS_PER_WEEK; + } + result += d * SECONDS_PER_DAY; + + // Add in the time for the month + for(int m=0; m < month - 1; ++m) { + result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY; + } + break; + } + } + return result; + } + }; + + /** + * The current rule for finding timezone variants arbitrarily far in + * the future. They are based on a string representation that + * specifies the standard name and offset. For timezones with + * daylight savings, the string specifies the daylight variant name + * and offset and the rules for switching between them. + * + * rule = <standard name><standard offset><daylight>? + * name = string with no numbers or '+', '-', or ',' + * offset = [-+]?hh(:mm(:ss)?)? + * daylight = <name><offset>,<start day>(/<offset>)?,<end day>(/<offset>)? + * day = J<day without 2/29>|<day with 2/29>|M<month>.<week>.<day of week> + */ + class FutureRuleImpl: public FutureRule { + std::string ruleString; + TimezoneVariant standard; + bool hasDst; + TimezoneVariant dst; + Transition start; + Transition end; + + // expanded time_t offsets of transitions + std::vector<int64_t> offsets; + + // Is the epoch (1 Jan 1970 00:00) in standard time? + // This code assumes that the transition dates fall in the same order + // each year. Hopefully no timezone regions decide to move across the + // equator, which is about what it would take. + bool startInStd; + + void computeOffsets() { + if (!hasDst) { + startInStd = true; + offsets.resize(1); + } else { + // Insert a transition for the epoch and two per a year for the next + // 400 years. We assume that the all even positions are in standard + // time if and only if startInStd and the odd ones are the reverse. + offsets.resize(400 * 2 + 1); + startInStd = start.getTime(1970) < end.getTime(1970); + int64_t base = 0; + for(int64_t year = 1970; year < 1970 + 400; ++year) { + if (startInStd) { + offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] = + base + start.getTime(year) - standard.gmtOffset; + offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] = + base + end.getTime(year) - dst.gmtOffset; + } else { + offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] = + base + end.getTime(year) - dst.gmtOffset; + offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] = + base + start.getTime(year) - standard.gmtOffset; + } + base += (isLeap(year) ? 366 : 365) * SECONDS_PER_DAY; + } + } + offsets[0] = 0; + } + + public: + virtual ~FutureRuleImpl(); + bool isDefined() const override; + const TimezoneVariant& getVariant(int64_t clk) const override; + void print(std::ostream& out) const override; + + friend class FutureRuleParser; + }; + + FutureRule::~FutureRule() { + // PASS + } + + FutureRuleImpl::~FutureRuleImpl() { + // PASS + } + + bool FutureRuleImpl::isDefined() const { + return ruleString.size() > 0; + } + + const TimezoneVariant& FutureRuleImpl::getVariant(int64_t clk) const { + if (!hasDst) { + return standard; + } else { + int64_t adjusted = clk % SECONDS_PER_400_YEARS; + if (adjusted < 0) { + adjusted += SECONDS_PER_400_YEARS; + } + int64_t idx = binarySearch(offsets, adjusted); + if (startInStd == (idx % 2 == 0)) { + return standard; + } else { + return dst; + } + } + } + + void FutureRuleImpl::print(std::ostream& out) const { + if (isDefined()) { + out << " Future rule: " << ruleString << "\n"; + out << " standard " << standard.toString() << "\n"; + if (hasDst) { + out << " dst " << dst.toString() << "\n"; + out << " start " << start.toString() << "\n"; + out << " end " << end.toString() << "\n"; + } + } + } + + /** + * A parser for the future rule strings. + */ + class FutureRuleParser { + public: + FutureRuleParser(const std::string& str, + FutureRuleImpl* rule + ): ruleString(str), + length(str.size()), + position(0), + output(*rule) { + output.ruleString = str; + if (position != length) { + parseName(output.standard.name); + output.standard.gmtOffset = -parseOffset(); + output.standard.isDst = false; + output.hasDst = position < length; + if (output.hasDst) { + parseName(output.dst.name); + output.dst.isDst = true; + if (ruleString[position] != ',') { + output.dst.gmtOffset = -parseOffset(); + } else { + output.dst.gmtOffset = output.standard.gmtOffset + 60 * 60; + } + parseTransition(output.start); + parseTransition(output.end); + } + if (position != length) { + throwError("Extra text"); + } + output.computeOffsets(); + } + } + + private: + + const std::string& ruleString; + size_t length; + size_t position; + FutureRuleImpl &output; + + void throwError(const char *msg) { + std::stringstream buffer; + buffer << msg << " at " << position << " in '" << ruleString << "'"; + throw TimezoneError(buffer.str()); + } + + /** + * Parse the names of the form: + * ([^-+0-9,]+|<[^>]+>) + * and set the output string. + */ + void parseName(std::string& result) { + if (position == length) { + throwError("name required"); + } + size_t start = position; + if (ruleString[position] == '<') { + while (position < length && ruleString[position] != '>') { + position += 1; + } + if (position == length) { + throwError("missing close '>'"); + } + position +=1; + } else { + while (position < length) { + char ch = ruleString[position]; + if (isdigit(ch) || ch == '-' || ch == '+' || ch == ',') { + break; + } + position += 1; + } + } + if (position == start) { + throwError("empty string not allowed"); + } + result = ruleString.substr(start, position - start); + } + + /** + * Parse an integer of the form [0-9]+ and return it. + */ + int64_t parseNumber() { + if (position >= length) { + throwError("missing number"); + } + int64_t result = 0; + while (position < length) { + char ch = ruleString[position]; + if (isdigit(ch)) { + result = result * 10 + (ch - '0'); + position += 1; + } else { + break; + } + } + return result; + } + + /** + * Parse the offsets of the form: + * [-+]?[0-9]+(:[0-9]+(:[0-9]+)?)? + * and convert it into a number of seconds. + */ + int64_t parseOffset() { + int64_t scale = 3600; + bool isNegative = false; + if (position < length) { + char ch = ruleString[position]; + isNegative = ch == '-'; + if (ch == '-' || ch == '+') { + position += 1; + } + } + int64_t result = parseNumber() * scale; + while (position < length && scale > 1 && ruleString[position] == ':') { + scale /= 60; + position += 1; + result += parseNumber() * scale; + } + if (isNegative) { + result = -result; + } + return result; + } + + /** + * Parse a transition of the following form: + * ,(J<number>|<number>|M<number>.<number>.<number>)(/<offset>)? + */ + void parseTransition(Transition& transition) { + if (length - position < 2 || ruleString[position] != ',') { + throwError("missing transition"); + } + position += 1; + char ch = ruleString[position]; + if (ch == 'J') { + transition.kind = TRANSITION_JULIAN; + position += 1; + transition.day = parseNumber(); + } else if (ch == 'M') { + transition.kind = TRANSITION_MONTH; + position += 1; + transition.month = parseNumber(); + if (position == length || ruleString[position] != '.') { + throwError("missing first ."); + } + position += 1; + transition.week = parseNumber(); + if (position == length || ruleString[position] != '.') { + throwError("missing second ."); + } + position += 1; + transition.day = parseNumber(); + } else { + transition.kind = TRANSITION_DAY; + transition.day = parseNumber(); + } + if (position < length && ruleString[position] == '/') { + position += 1; + transition.time = parseOffset(); + } else { + transition.time = 2 * 60 * 60; + } + } + }; + + /** + * Parse the POSIX TZ string. + */ + std::unique_ptr<FutureRule> parseFutureRule(const std::string& ruleString) { + std::unique_ptr<FutureRule> result(new FutureRuleImpl()); + FutureRuleParser parser(ruleString, + dynamic_cast<FutureRuleImpl*>(result.get())); + return result; + } + + std::string TimezoneVariant::toString() const { + std::stringstream buffer; + buffer << name << " " << gmtOffset; + if (isDst) { + buffer << " (dst)"; + } + return buffer.str(); + } + + /** + * An abstraction of the differences between versions. + */ + class VersionParser { + public: + virtual ~VersionParser(); + + /** + * Get the version number. + */ + virtual uint64_t getVersion() const = 0; + + /** + * Get the number of bytes + */ + virtual uint64_t getTimeSize() const = 0; + + /** + * Parse the time at the given location. + */ + virtual int64_t parseTime(const unsigned char* ptr) const = 0; + + /** + * Parse the future string + */ + virtual std::string parseFutureString(const unsigned char *ptr, + uint64_t offset, + uint64_t length) const = 0; + }; + + VersionParser::~VersionParser() { + // PASS + } + + static uint32_t decode32(const unsigned char* ptr) { + return static_cast<uint32_t>(ptr[0] << 24) | + static_cast<uint32_t>(ptr[1] << 16) | + static_cast<uint32_t>(ptr[2] << 8) | + static_cast<uint32_t>(ptr[3]); + } + + class Version1Parser: public VersionParser { + public: + virtual ~Version1Parser(); + + virtual uint64_t getVersion() const override { + return 1; + } + + /** + * Get the number of bytes + */ + virtual uint64_t getTimeSize() const override { + return 4; + } + + /** + * Parse the time at the given location. + */ + virtual int64_t parseTime(const unsigned char* ptr) const override { + // sign extend from 32 bits + return static_cast<int32_t>(decode32(ptr)); + } + + virtual std::string parseFutureString(const unsigned char *, + uint64_t, + uint64_t) const override { + return ""; + } + }; + + Version1Parser::~Version1Parser() { + // PASS + } + + class Version2Parser: public VersionParser { + public: + virtual ~Version2Parser(); + + virtual uint64_t getVersion() const override { + return 2; + } + + /** + * Get the number of bytes + */ + virtual uint64_t getTimeSize() const override { + return 8; + } + + /** + * Parse the time at the given location. + */ + virtual int64_t parseTime(const unsigned char* ptr) const override { + return static_cast<int64_t>(decode32(ptr)) << 32 | decode32(ptr + 4); + } + + virtual std::string parseFutureString(const unsigned char *ptr, + uint64_t offset, + uint64_t length) const override { + return std::string(reinterpret_cast<const char*>(ptr) + offset + 1, + length - 2); + } + }; + + Version2Parser::~Version2Parser() { + // PASS + } + + class TimezoneImpl: public Timezone { + public: + TimezoneImpl(const std::string& name, + const std::vector<unsigned char> bytes); + virtual ~TimezoneImpl(); + + /** + * Get the variant for the given time (time_t). + */ + const TimezoneVariant& getVariant(int64_t clk) const override; + + void print(std::ostream&) const override; + + uint64_t getVersion() const override { + return version; + } + + int64_t getEpoch() const override { + return epoch; + } + + private: + void parseTimeVariants(const unsigned char* ptr, + uint64_t variantOffset, + uint64_t variantCount, + uint64_t nameOffset, + uint64_t nameCount); + void parseZoneFile(const unsigned char* ptr, + uint64_t sectionOffset, + uint64_t fileLength, + const VersionParser& version); + // filename + std::string filename; + + // the version of the file + uint64_t version; + + // the list of variants for this timezone + std::vector<TimezoneVariant> variants; + + // the list of the times where the local rules change + std::vector<int64_t> transitions; + + // the variant that starts at this transition. + std::vector<uint64_t> currentVariant; + + // the variant before the first transition + uint64_t ancientVariant; + + // the rule for future times + std::unique_ptr<FutureRule> futureRule; + + // the last explicit transition after which we use the future rule + int64_t lastTransition; + + // The ORC epoch time in this timezone. + int64_t epoch; + }; + + DIAGNOSTIC_PUSH + #ifdef __clang__ + DIAGNOSTIC_IGNORE("-Wglobal-constructors") + DIAGNOSTIC_IGNORE("-Wexit-time-destructors") + #endif + static std::map<std::string, Timezone*> timezoneCache; + DIAGNOSTIC_POP + + Timezone::~Timezone() { + // PASS + } + + TimezoneImpl::TimezoneImpl(const std::string& _filename, + const std::vector<unsigned char> buffer + ): filename(_filename) { + parseZoneFile(&buffer[0], 0, buffer.size(), Version1Parser()); + // Build the literal for the ORC epoch + // 2015 Jan 1 00:00:00 + tm epochStruct; + epochStruct.tm_sec = 0; + epochStruct.tm_min = 0; + epochStruct.tm_hour = 0; + epochStruct.tm_mday = 1; + epochStruct.tm_mon = 0; + epochStruct.tm_year = 2015 - 1900; + epochStruct.tm_isdst = 0; + time_t utcEpoch = timegm(&epochStruct); + epoch = utcEpoch - getVariant(utcEpoch).gmtOffset; + } + + const char* getTimezoneDirectory() { + const char *dir = getenv("TZDIR"); + if (!dir) { + dir = DEFAULT_TZDIR; + } + return dir; + } + + std::string getLocalTimezoneName() { + // use the TZ environment variable, if it is set. + const char *tz = getenv("TZ"); + if (tz != nullptr) { + return std::string(tz); + } + // otherwise look at where /etc/localtime points and use that + struct stat linkStatus; + if (lstat(LOCAL_TIMEZONE, &linkStatus) == -1) { + throw TimezoneError(std::string("Can't stat local timezone link ") + + LOCAL_TIMEZONE + ": " + + strerror(errno)); + } + std::vector<char> buffer(static_cast<size_t>(linkStatus.st_size + 1)); + ssize_t len = readlink(LOCAL_TIMEZONE, &buffer[0], buffer.size()); + if (len == -1 || static_cast<size_t>(len) >= buffer.size()) { + throw TimezoneError(std::string("Can't read local timezone link ") + + LOCAL_TIMEZONE + ": " + + strerror(errno)); + } + buffer[static_cast<size_t>(len)] = 0; + const char *dirName = getTimezoneDirectory(); + size_t dirLength = strlen(dirName); + if (strncmp(dirName, &buffer[0], dirLength) != 0) { + throw TimezoneError(std::string("Local timezone link is not in ") + + dirName); + } + return std::string(&buffer[0] + dirLength); + } + + /** + * Get a timezone by name (eg. America/Los_Angeles). + * Results are cached. + */ + const Timezone& getTimezoneByName(const std::string& zone) { + std::map<std::string, Timezone*>::iterator itr = timezoneCache.find(zone); + if (itr != timezoneCache.end()) { + return *(itr->second); + } + std::string filename(getTimezoneDirectory()); + filename += "/"; + filename += zone; + int in = open(filename.c_str(), O_RDONLY); + if (in == -1) { + std::stringstream buffer; + buffer << "failed to open " << filename << " - " << strerror(errno); + throw TimezoneError(buffer.str()); + } + struct stat fileInfo; + if (fstat(in, &fileInfo) == -1) { + std::stringstream buffer; + buffer << "failed to stat " << filename << " - " << strerror(errno); + throw TimezoneError(buffer.str()); + } + if ((fileInfo.st_mode & S_IFMT) != S_IFREG) { + std::stringstream buffer; + buffer << "non-file in tzfile reader " << filename; + throw TimezoneError(buffer.str()); + } + size_t size = static_cast<size_t>(fileInfo.st_size); + std::vector<unsigned char> buffer(size); + size_t posn = 0; + while (posn < size) { + ssize_t ret = read(in, &buffer[posn], size - posn); + if (ret == -1) { + throw TimezoneError(std::string("Failure to read timezone file ") + + filename + " - " + strerror(errno)); + } + posn += static_cast<size_t>(ret); + } + if (close(in) == -1) { + std::stringstream err; + err << "failed to close " << filename << " - " << strerror(errno); + throw TimezoneError(err.str()); + } + Timezone* result = new TimezoneImpl(filename, buffer); + timezoneCache[zone] = result; + return *result; + } + + /** + * Parse a set of bytes as a timezone file as if they came from filename. + */ + std::unique_ptr<Timezone> getTimezone(const std::string& filename, + const std::vector<unsigned char>& b){ + return std::unique_ptr<Timezone>(new TimezoneImpl(filename, b)); + } + + TimezoneImpl::~TimezoneImpl() { + // PASS + } + + void TimezoneImpl::parseTimeVariants(const unsigned char* ptr, + uint64_t variantOffset, + uint64_t variantCount, + uint64_t nameOffset, + uint64_t nameCount) { + for(uint64_t variant=0; variant < variantCount; ++variant) { + variants[variant].gmtOffset = + static_cast<int32_t>(decode32(ptr + variantOffset + 6 * variant)); + variants[variant].isDst = ptr[variantOffset + 6 * variant + 4]; + uint nameStart = ptr[variantOffset + 6 * variant + 5]; + if (nameStart >= nameCount) { + std::stringstream buffer; + buffer << "name out of range in variant " << variant + << " - " << nameStart << " >= " << nameCount; + throw TimezoneError(buffer.str()); + } + variants[variant].name = std::string(reinterpret_cast<const char*>(ptr) + + nameOffset + nameStart); + } + } + + /** + * Parse the zone file to get the bits we need. + * There are two versions of the timezone file: + * + * Version 1(version = 0x00): + * Magic(version) + * Header + * TransitionTimes(4 byte) + * TransitionRules + * Rules + * LeapSeconds(4 byte) + * IsStd + * IsGmt + * + * Version2: + * Version1(0x32) = a version 1 copy of the data for old clients + * Magic(0x32) + * Header + * TransitionTimes(8 byte) + * TransitionRules + * Rules + * LeapSeconds(8 byte) + * IsStd + * IsGmt + * FutureString + */ + void TimezoneImpl::parseZoneFile(const unsigned char *ptr, + uint64_t sectionOffset, + uint64_t fileLength, + const VersionParser& versionParser) { + const uint64_t magicOffset = sectionOffset + 0; + const uint64_t headerOffset = magicOffset + 20; + + // check for validity before we start parsing + if (fileLength < headerOffset + 6 * 4 || + strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif", 4) + != 0) { + std::stringstream buffer; + buffer << "non-tzfile " << filename; + throw TimezoneError(buffer.str()); + } + + const uint64_t isGmtCount = decode32(ptr + headerOffset + 0); + const uint64_t isStdCount = decode32(ptr + headerOffset + 4); + const uint64_t leapCount = decode32(ptr + headerOffset + 8); + const uint64_t timeCount = decode32(ptr + headerOffset + 12); + const uint64_t variantCount = decode32(ptr + headerOffset + 16); + const uint64_t nameCount = decode32(ptr + headerOffset + 20); + + const uint64_t timeOffset = headerOffset + 24; + const uint64_t timeVariantOffset = + timeOffset + versionParser.getTimeSize() * timeCount; + const uint64_t variantOffset = timeVariantOffset + timeCount; + const uint64_t nameOffset = variantOffset + variantCount * 6; + const uint64_t sectionLength = nameOffset + nameCount + + (versionParser.getTimeSize() + 4) * leapCount + + isGmtCount + isStdCount; + + if (sectionLength > fileLength) { + std::stringstream buffer; + buffer << "tzfile too short " << filename + << " needs " << sectionLength << " and has " << fileLength; + throw TimezoneError(buffer.str()); + } + + // if it is version 2, skip over the old layout and read the new one. + if (sectionOffset == 0 && ptr[magicOffset + 4] != 0) { + parseZoneFile(ptr, sectionLength, fileLength, Version2Parser()); + return; + } + version = versionParser.getVersion(); + variants.resize(variantCount); + transitions.resize(timeCount); + currentVariant.resize(timeCount); + parseTimeVariants(ptr, variantOffset, variantCount, nameOffset, + nameCount); + bool foundAncient = false; + for(uint64_t t=0; t < timeCount; ++t) { + transitions[t] = + versionParser.parseTime(ptr + timeOffset + + t * versionParser.getTimeSize()); + currentVariant[t] = ptr[timeVariantOffset + t]; + if (currentVariant[t] >= variantCount) { + std::stringstream buffer; + buffer << "tzfile rule out of range " << filename + << " references rule " << currentVariant[t] + << " of " << variantCount; + throw TimezoneError(buffer.str()); + } + // find the oldest standard time and use that as the ancient value + if (!foundAncient && + !variants[currentVariant[t]].isDst) { + foundAncient = true; + ancientVariant = currentVariant[t]; + } + } + if (!foundAncient) { + ancientVariant = 0; + } + futureRule = parseFutureRule(versionParser.parseFutureString + (ptr, sectionLength, + fileLength - sectionLength)); + + // find the lower bound for applying the future rule + if (futureRule->isDefined()) { + if (timeCount > 0) { + lastTransition = transitions[timeCount - 1]; + } else { + lastTransition = INT64_MIN; + } + } else { + lastTransition = INT64_MAX; + } + } + + const TimezoneVariant& TimezoneImpl::getVariant(int64_t clk) const { + // if it is after the last explicit entry in the table, + // use the future rule to get an answer + if (clk > lastTransition) { + return futureRule->getVariant(clk); + } else { + int64_t idx = binarySearch(transitions, clk); + if (idx < 0) { + idx = static_cast<int64_t>(ancientVariant); + } + return variants[currentVariant[static_cast<size_t>(idx)]]; + } + } + + void TimezoneImpl::print(std::ostream& out) const { + out << "Timezone file: " << filename << "\n"; + out << " Version: " << version << "\n"; + futureRule->print(out); + for(uint64_t r=0; r < variants.size(); ++r) { + out << " Variant " << r << ": " + << variants[r].toString() << "\n"; + } + for(uint64_t t=0; t < transitions.size(); ++t) { + tm timeStruct; + tm* result = nullptr; + char buffer[25]; + if (sizeof(time_t) >= 8) { + time_t val = transitions[t]; + result = gmtime_r(&val, &timeStruct); + if (result) { + strftime(buffer, sizeof(buffer), "%F %H:%M:%S", &timeStruct); + } + } + std::cout << " Transition: " << (result == nullptr ? "null" : buffer) + << " (" << transitions[t] << ") -> " + << variants[currentVariant[t]].name + << "\n"; + } + } + + TimezoneError::TimezoneError(const std::string& what + ): std::runtime_error(what) { + // PASS + } + + TimezoneError::TimezoneError(const TimezoneError& other + ): std::runtime_error(other) { + // PASS + } + + TimezoneError::~TimezoneError() ORC_NOEXCEPT { + // PASS + } + +} http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/src/Timezone.hh ---------------------------------------------------------------------- diff --git a/c++/src/Timezone.hh b/c++/src/Timezone.hh new file mode 100644 index 0000000..e35f170 --- /dev/null +++ b/c++/src/Timezone.hh @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TIMEZONE_HH +#define TIMEZONE_HH + +// This file is for timezone routines. + +#include "Adaptor.hh" + +#include <memory> +#include <stdexcept> +#include <stdint.h> +#include <string> +#include <vector> + +namespace orc { + + /** + * A variant (eg. PST or PDT) of a timezone (eg. America/Los_Angeles). + */ + struct TimezoneVariant { + int64_t gmtOffset; + bool isDst; + std::string name; + + std::string toString() const; + }; + + /** + * A region that shares the same legal rules for wall clock time and + * day light savings transitions. They are typically named for the largest + * city in the region (eg. America/Los_Angeles or America/Mexico_City). + */ + class Timezone { + public: + virtual ~Timezone(); + + /** + * Get the variant for the given time (time_t). + */ + virtual const TimezoneVariant& getVariant(int64_t clk) const = 0; + + /** + * Get the number of seconds between the ORC epoch in this timezone + * and Unix epoch. + * ORC epoch is 1 Jan 2015 00:00:00 local. + * Unix epoch is 1 Jan 1970 00:00:00 UTC. + */ + virtual int64_t getEpoch() const = 0; + + /** + * Print the timezone to the stream. + */ + virtual void print(std::ostream&) const = 0; + + /** + * Get the version of the zone file. + */ + virtual uint64_t getVersion() const =0; + }; + + /** + * Get the local timezone name. + */ + std::string getLocalTimezoneName(); + + /** + * Get a timezone by name (eg. America/Los_Angeles). + * Results are cached. + */ + const Timezone& getTimezoneByName(const std::string& zone); + + /** + * Parse a set of bytes as a timezone file as if they came from filename. + */ + std::unique_ptr<Timezone> getTimezone(const std::string& filename, + const std::vector<unsigned char>& b); + + class TimezoneError: public std::runtime_error { + public: + TimezoneError(const std::string& what); + TimezoneError(const TimezoneError&); + virtual ~TimezoneError() ORC_NOEXCEPT; + }; + + /** + * Represents the parsed POSIX timezone rule strings that are used to + * describe the future transitions, because they can go arbitrarily far into + * the future. + */ + class FutureRule { + public: + virtual ~FutureRule(); + virtual bool isDefined() const = 0; + virtual const TimezoneVariant& getVariant(int64_t clk) const = 0; + virtual void print(std::ostream& out) const = 0; + }; + + /** + * Parse the POSIX TZ string. + */ + std::unique_ptr<FutureRule> parseFutureRule(const std::string& ruleString); +} + +#endif http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/test/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt index 4a09e9e..69a0f6b 100644 --- a/c++/test/CMakeLists.txt +++ b/c++/test/CMakeLists.txt @@ -30,6 +30,7 @@ add_executable (test-orc TestDriver.cc TestInt128.cc TestRle.cc + TestTimezone.cc TestType.cc ) http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/test/TestColumnPrinter.cc ---------------------------------------------------------------------- diff --git a/c++/test/TestColumnPrinter.cc b/c++/test/TestColumnPrinter.cc index 8cc3a22..f617f3b 100644 --- a/c++/test/TestColumnPrinter.cc +++ b/c++/test/TestColumnPrinter.cc @@ -133,18 +133,18 @@ namespace orc { TimestampVectorBatch batch(1024, *getDefaultPool()); batch.numElements = 12; batch.hasNulls = false; - batch.data[0] = 1420099200; - batch.data[1] = 963298800; - batch.data[2] = 1426197659; - batch.data[3] = 1426197659; - batch.data[4] = 1426197659; - batch.data[5] = 1426197659; - batch.data[6] = 1426197659; - batch.data[7] = 1426197659; - batch.data[8] = 1426197659; - batch.data[9] = 1426197659; - batch.data[10] = 1426197659; - batch.data[11] = 1426197659; + batch.data[0] = 1420070400; + batch.data[1] = 963273600; + batch.data[2] = 1426172459; + batch.data[3] = 1426172459; + batch.data[4] = 1426172459; + batch.data[5] = 1426172459; + batch.data[6] = 1426172459; + batch.data[7] = 1426172459; + batch.data[8] = 1426172459; + batch.data[9] = 1426172459; + batch.data[10] = 1426172459; + batch.data[11] = 1426172459; batch.nanoseconds[0] = 0; batch.nanoseconds[1] = 0; batch.nanoseconds[2] = 0; http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/test/TestColumnReader.cc ---------------------------------------------------------------------- diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc index 075a069..18b3d67 100644 --- a/c++/test/TestColumnReader.cc +++ b/c++/test/TestColumnReader.cc @@ -25,6 +25,7 @@ #include "wrap/gtest-wrapper.h" #include "wrap/gmock.h" +#include <cmath> #include <iostream> #include <vector> @@ -49,9 +50,8 @@ public: return *getDefaultPool(); } - // the epoch offset for America/Los_Angeles - int64_t getEpochOffset() const { - return 1420099200; + const Timezone& getWriterTimezone() const override { + return getTimezoneByName("America/Los_Angeles"); } }; @@ -67,6 +67,10 @@ MockStripeStreams::getStream(uint64_t columnId, (getStreamProxy(columnId, kind, shouldStream)); } +bool isNotNull(tm *timeptr) { + return timeptr != nullptr; +} + TEST(TestColumnReader, testBooleanWithNulls) { MockStripeStreams streams; @@ -2380,7 +2384,7 @@ TEST(TestColumnReader, testFloatWithNulls) { EXPECT_EQ(0, doubleBatch->notNull[i]); } else if (i == 7) { EXPECT_EQ(1, doubleBatch->notNull[i]); - EXPECT_EQ(true, isnan(doubleBatch->data[i])); + EXPECT_EQ(true, std::isnan(doubleBatch->data[i])); } else { EXPECT_EQ(1, doubleBatch->notNull[i]); EXPECT_DOUBLE_EQ(test_vals[i], doubleBatch->data[i]); @@ -2544,7 +2548,7 @@ TEST(TestColumnReader, testDoubleWithNulls) { EXPECT_EQ(0, doubleBatch->notNull[i]) << "Wrong value at " << i; } else if (i == 7) { EXPECT_EQ(1, doubleBatch->notNull[i]) << "Wrong value at " << i; - EXPECT_EQ(true, isnan(doubleBatch->data[i])); + EXPECT_EQ(true, std::isnan(doubleBatch->data[i])); } else { EXPECT_EQ(1, doubleBatch->notNull[i]) << "Wrong value at " << i; EXPECT_DOUBLE_EQ(test_vals[i], doubleBatch->data[i]) @@ -2705,7 +2709,11 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) { } else { EXPECT_EQ(1, longBatch->notNull[i]); time_t time = static_cast<time_t>(longBatch->data[i]); - EXPECT_STREQ(expected[vals_ix], ctime(&time)); + tm timeStruct; + ASSERT_PRED1(isNotNull, gmtime_r(&time, &timeStruct)); + char buffer[30]; + asctime_r(&timeStruct, buffer); + EXPECT_STREQ(expected[vals_ix], buffer); EXPECT_EQ(expected_nano[vals_ix], longBatch->nanoseconds[i]); vals_ix++; } @@ -2724,7 +2732,11 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) { } else { EXPECT_EQ(1, longBatch->notNull[i]); time_t time = static_cast<time_t>(longBatch->data[i]); - EXPECT_STREQ(expected[vals_ix], ctime(&time)); + tm timeStruct; + ASSERT_PRED1(isNotNull, gmtime_r(&time, &timeStruct)); + char buffer[30]; + asctime_r(&timeStruct, buffer); + EXPECT_STREQ(expected[vals_ix], buffer); EXPECT_EQ(expected_nano[vals_ix], longBatch->nanoseconds[i]); vals_ix++; } @@ -2765,7 +2777,6 @@ TEST(TestColumnReader, testTimestamp) { .WillRepeatedly(testing::Return(new SeekableArrayInputStream (buffer1, ARRAY_SIZE(buffer1)))); - const unsigned char buffer2[] = { 0xf6, 0x00, 0xa8, 0xd1, 0xf9, 0xd6, 0x03, @@ -2822,7 +2833,11 @@ TEST(TestColumnReader, testTimestamp) { for (size_t i = 0; i < batch.numElements; ++i) { time_t time = static_cast<time_t>(longBatch->data[i]); - EXPECT_STREQ(expected[i], ctime(&time)) << "Wrong value at " << i; + tm timeStruct; + ASSERT_PRED1(isNotNull, gmtime_r(&time, &timeStruct)); + char buffer[30]; + asctime_r(&timeStruct, buffer); + EXPECT_STREQ(expected[i], buffer) << "Wrong value at " << i; EXPECT_EQ(expectedNano[i], longBatch->nanoseconds[i]); } } http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/c++/test/TestTimezone.cc ---------------------------------------------------------------------- diff --git a/c++/test/TestTimezone.cc b/c++/test/TestTimezone.cc new file mode 100644 index 0000000..13a14ee --- /dev/null +++ b/c++/test/TestTimezone.cc @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Adaptor.hh" +#include "Timezone.hh" +#include "wrap/gtest-wrapper.h" + +#include <iostream> +#include <unistd.h> +#include <vector> + +namespace orc { + + bool isLeap(int64_t year); + + TEST(TestTimezone, isLeap) { + EXPECT_TRUE(isLeap(2000)); + EXPECT_FALSE(isLeap(2001)); + EXPECT_TRUE(isLeap(2004)); + EXPECT_FALSE(isLeap(2100)); + EXPECT_FALSE(isLeap(2200)); + EXPECT_FALSE(isLeap(2300)); + EXPECT_TRUE(isLeap(2400)); + } + + int64_t binarySearch(const std::vector<int64_t> &array, int64_t target); + + TEST(TestTimezone, testBinarySearch) { + std::vector<int64_t> vect; + vect.push_back(0); + EXPECT_EQ(-1, binarySearch(vect, -5)); + EXPECT_EQ(0, binarySearch(vect, 0)); + EXPECT_EQ(0, binarySearch(vect, 5)); + vect.push_back(2); + EXPECT_EQ(-1, binarySearch(vect, -1)); + EXPECT_EQ(0, binarySearch(vect, 0)); + EXPECT_EQ(0, binarySearch(vect, 1)); + EXPECT_EQ(1, binarySearch(vect, 2)); + EXPECT_EQ(1, binarySearch(vect, 3)); + } + + /** + * Parse a future rule string and return the parsed rule as a string. + */ + std::string stringifyRule(const std::string& ruleString) { + std::unique_ptr<FutureRule> rule = parseFutureRule(ruleString); + std::stringstream buffer; + rule->print(buffer); + return buffer.str(); + } + + + TEST(TestTimezone, parseFutureRule) { + EXPECT_EQ(" Future rule: FOO0\n standard FOO 0\n", + stringifyRule("FOO0")); + EXPECT_EQ(" Future rule: <FOO+->010:02\n standard <FOO+-> -36120\n", + stringifyRule("<FOO+->010:02")); + // unclosed '<' + EXPECT_THROW(stringifyRule("<FOO12"), TimezoneError); + // empty name + EXPECT_THROW(stringifyRule("+8"), TimezoneError); + // missing offset + EXPECT_THROW(stringifyRule("FOO"), TimezoneError); + EXPECT_EQ(" Future rule: FOO-123:45:67\n standard FOO 445567\n", + stringifyRule("FOO-123:45:67")); + EXPECT_EQ(" Future rule: FOO+8\n standard FOO -28800\n", + stringifyRule("FOO+8")); + EXPECT_EQ((" Future rule: FOO8BAR,J10,20\n standard FOO -28800\n" + " dst BAR -25200 (dst)\n start julian 10 at 2:0:0\n" + " end day 20 at 2:0:0\n"), + stringifyRule("FOO8BAR,J10,20")); + EXPECT_EQ((" Future rule: FOO+8BAR-0:30,M3.1.0,M10.5.6\n" + " standard FOO -28800\n" + " dst BAR 1800 (dst)\n" + " start month 3 week 1 day 0 at 2:0:0\n" + " end month 10 week 5 day 6 at 2:0:0\n"), + stringifyRule("FOO+8BAR-0:30,M3.1.0,M10.5.6")); + EXPECT_EQ((" Future rule: FOO10BAR1,3,4\n" + " standard FOO -36000\n" + " dst BAR -3600 (dst)\n" + " start day 3 at 2:0:0\n" + " end day 4 at 2:0:0\n"), + stringifyRule("FOO10BAR1,3,4")); + // missing transition times + EXPECT_THROW(stringifyRule("FOO8BAR"), TimezoneError); + // check left over text + EXPECT_THROW(stringifyRule("FOO8BAR,10,20x"), TimezoneError); + EXPECT_EQ((" Future rule: FOO8BAR,3/3,4/4:30\n" + " standard FOO -28800\n" + " dst BAR -25200 (dst)\n" + " start day 3 at 3:0:0\n" + " end day 4 at 4:30:0\n"), + stringifyRule("FOO8BAR,3/3,4/4:30")); + EXPECT_EQ((" Future rule: FOO-8BAR,J3/3,M4.5.6/00:04:0007\n" + " standard FOO 28800\n" + " dst BAR 32400 (dst)\n" + " start julian 3 at 3:0:0\n" + " end month 4 week 5 day 6 at 0:4:7\n"), + stringifyRule("FOO-8BAR,J3/3,M4.5.6/00:04:0007")); + // too many fields in offset + EXPECT_THROW(stringifyRule("FOO8BAR,10,20/4:30:20:10"), TimezoneError); + + EXPECT_FALSE(parseFutureRule("")->isDefined()); + EXPECT_TRUE(parseFutureRule("FOO12")->isDefined()); + } + + const std::string& getZoneFromRule(FutureRule *rule, + const std::string& date) { + tm timeStruct; + if (strptime(date.c_str(), "%F %H:%M:%S", &timeStruct) == nullptr) { + throw TimezoneError("bad time " + date); + } + return rule->getVariant(timegm(&timeStruct)).name; + } + + TEST(TestTimezone, useFutureRule) { + std::unique_ptr<FutureRule> rule = + parseFutureRule("FOO8BAR,M3.2.0,M11.1.0"); + // 1970 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-01-01 00:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-03-08 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-03-08 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-04-01 00:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-11-01 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-11-01 09:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-12-31 00:00:00")); + + // 2369 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2369-01-01 00:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2369-03-09 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "2369-03-09 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "2369-11-02 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2369-11-02 09:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2369-12-31 00:00:00")); + + // 2370 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2370-01-01 00:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2370-03-08 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "2370-03-08 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "2370-04-01 00:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "2370-11-01 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2370-11-01 09:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "2370-12-31 00:00:00")); + + rule = parseFutureRule("FOO8BAR,J10,J360"); + //1970 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-01-11 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-01-11 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-12-27 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-12-27 09:00:00")); + //1972 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1972-01-11 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1972-01-11 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1972-12-27 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1972-12-27 09:00:00")); + + rule = parseFutureRule("FOO8BAR,10,360"); + #ifdef HAS_PRE_1970 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1969-01-11 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1969-01-11 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1969-12-27 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1969-12-27 09:00:00")); + #endif + //1970 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-01-11 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-01-11 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-12-27 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-12-27 09:00:00")); + //1972 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1972-01-11 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1972-01-11 10:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1972-12-26 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1972-12-26 09:00:00")); + + // test a southern hemisphere timezone + rule = parseFutureRule("FOO8BAR,360,10"); + //1970 + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-01-11 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-01-11 09:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-12-27 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-12-27 10:00:00")); + //1972 + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1972-01-11 08:59:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1972-01-11 09:00:00")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1972-12-26 09:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1972-12-26 10:00:00")); + + rule = parseFutureRule("FOO8BAR,J10/3,J360/3:30"); + //1970 + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-01-11 10:59:59")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-01-11 11:00:00")); + EXPECT_EQ("BAR", getZoneFromRule(rule.get(), "1970-12-27 10:29:59")); + EXPECT_EQ("FOO", getZoneFromRule(rule.get(), "1970-12-27 10:30:00")); + } + + unsigned char decodeBase64Char(unsigned char ch) { + switch (ch >> 4) { + case 4: case 5: return static_cast<unsigned char>(ch - 'A'); + case 6: case 7: return static_cast<unsigned char>(ch - 'a' + 26); + case 3: return static_cast<unsigned char>(ch - '0' + 52); + case 2: return static_cast<unsigned char>(ch == '+' ? 62 : 63); + default: return 255; + } + } + + std::vector<unsigned char> decodeBase64(const std::string& input) { + std::vector<unsigned char> result; + result.reserve(3 * (input.size() / 4)); + uint32_t accumulator = 0; + uint32_t bits = 0; + for(uint32_t c=0; c < input.size() && input[c] != '='; ++c) { + accumulator = (accumulator << 6) | + decodeBase64Char(static_cast<unsigned char>(input[c])); + bits += 6; + if (bits >= 8) { + bits -= 8; + result.push_back(static_cast<unsigned char>(accumulator >> bits)); + accumulator &= ~(0xffffffff << bits); + } + } + return result; + } + + static const char *LA_VER1 = + ("VFppZgAAAAAAAAAAAAAAAAAAAAAAAAAEAAAABAAAAAAAAAC5AAAABAAAABCepkig" +"n7sVkKCGKqChmveQy4kaoNIj9HDSYSYQ1v50INiArZDa/tGg28CQENzes6DdqayQ3r6Vo" +"N+JjpDgnneg4WlwkOJ+WaDjSVKQ5F47oOUpNJDmR1gg5xJREOgnOiDo8jMQ6gccIOrSFR" +"Dr5v4g7LH3EO3G4CDukdkQ76/8oPBxuxDxj96g8n/BkPNvwKD0X6OQ9U+ioPY/hZD3L4S" +"g+CiiEPkPZqD6CIQQ+viDIPvoZhD82GUg/chIEP64RyD/qCoQAJgpIAGIDBACeAsgA3Eo" +"kARhJ6AFUQqQBkEJoAcw7JAHjUOgCRDOkAmtvyAK8LCQC+CvoAzZzRANwJGgDrmvEA+pr" +"iAQmZEQEYmQIBJ5cxATaXIgFFlVEBVJVCAWOTcQFyk2IBgiU5AZCRggGgI1kBryNKAb4h" +"eQHNIWoB3B+ZAesfigH6HbkCB2KyAhgb2QIlYNICNq2hAkNe8gJUq8ECYV0SAnKp4QJ/7" +"toCkKgBAp3s+gKupiECu+saAs036QLZ6ToC6zYJAvfnWgMJNCkDFnkiAycySQM0d0IDRT" +"BpA1J1YgNjLokDcHOCA4HAUQOOcaIDn75xA6xvwgO9vJEDywGKA9u6sQPo/6oD+bjRBAb" +"9ygQYSpkEJPvqBDZIuQRC+goEVEbZBF89MgRy2KEEfTtSBJDWwQSbOXIErtThBLnLOgTN" +"ZqkE18laBOtkyQT1x3oFCWLpBRPFmgUnYQkFMcO6BUVfKQVPwdoFY11JBW5TogWB7xEFj" +"FHCBZ/tMQWqT+IFvetRBchOAgXb6XEF5kwiBfnnkQYE3eoGGHlZBiLcCgY2d3kGQNoqBl" +"R1mQZe2EoGcnO5BnzWagaQcdkGmtSKBq5v+Qa5ZlIGzQHBBtdkcgbq/+EG9WKSBwj+AQc" +"TYLIHJvwhBzFe0gdE+kEHT/CaB2OMCQdt7roHgYopB4vs2gefiEkHqer6B72GaQfH6RoH" +"24SJB+XnOgf5gqkAABAAECAwEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQA" +"BAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEA" +"AQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABA" +"AEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQABAAEAAQAB//+dkAEA//+PgA" +"AE//+dkAEI//+dkAEMUERUAFBTVABQV1QAUFBUAAAAAAEAAAAB"); + + static const char *LA_VER2 = + ("VFppZjIAAAAAAAAAAAAAAAAAAAAAAAAFAAAABQAAAAAAAAC6AAAABQAAABSAAAAAnqZIoJ" +"+7FZCghiqgoZr3kMuJGqDSI/Rw0mEmENb+dCDYgK2Q2v7RoNvAkBDc3rOg3amskN6+laD" +"fiY6Q4J53oOFpcJDiflmg40lSkOReO6DlKTSQ5kdYIOcSURDoJzog6PIzEOoHHCDq0hUQ" +"6+b+IOyx9xDtxuAg7pHZEO+v/KDwcbsQ8Y/eoPJ/wZDzb8Cg9F+jkPVPoqD2P4WQ9y+Eo" +"PgoohD5D2ag+giEEPr4gyD76GYQ/NhlIP3ISBD+uEcg/6gqEACYKSABiAwQAngLIANxKJ" +"AEYSegBVEKkAZBCaAHMOyQB41DoAkQzpAJrb8gCvCwkAvgr6AM2c0QDcCRoA65rxAPqa4" +"gEJmREBGJkCASeXMQE2lyIBRZVRAVSVQgFjk3EBcpNiAYIlOQGQkYIBoCNZAa8jSgG+IX" +"kBzSFqAdwfmQHrH4oB+h25AgdisgIYG9kCJWDSAjatoQJDXvICVKvBAmFdEgJyqeECf+7" +"aApCoAQKd7PoCrqYhArvrGgLNN+kC2ek6Aus2CQL351oDCTQpAxZ5IgMnMkkDNHdCA0Uw" +"aQNSdWIDYy6JA3BzggOBwFEDjnGiA5++cQOsb8IDvbyRA8sBigPburED6P+qA/m40QQG/" +"coEGEqZBCT76gQ2SLkEQvoKBFRG2QRfPTIEctihBH07UgSQ1sEEmzlyBK7U4QS5yzoEzW" +"apBNfJWgTrZMkE9cd6BQli6QUTxZoFJ2EJBTHDugVFXykFT8HaBWNdSQVuU6IFge8RBYx" +"RwgWf7TEFqk/iBb3rUQXITgIF2+lxBeZMIgX555EGBN3qBhh5WQYi3AoGNnd5BkDaKgZU" +"dZkGXthKBnJzuQZ81moGkHHZBprUigaub/kGuWZSBs0BwQbXZHIG6v/hBvVikgcI/gEHE" +"2CyByb8IQcxXtIHRPpBB0/wmgdjjAkHbe66B4GKKQeL7NoHn4hJB6nq+ge9hmkHx+kaB9" +"uEiQfl5zoH+YKpACAQIBAgMEAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECA" +"QIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAg" +"ECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQI" +"BAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQL//5EmAAD//52Q" +"AQT//4+AAAj//52QAQz//52QARBMTVQAUERUAFBTVABQV1QAUFBUAAAAAAABAAAAAAFUW" +"mlmMgAAAAAAAAAAAAAAAAAAAAAAAAUAAAAFAAAAAAAAALsAAAAFAAAAFPgAAAAAAAAA//" +"///14EGsD/////nqZIoP////+fuxWQ/////6CGKqD/////oZr3kP/////LiRqg/////9I" +"j9HD/////0mEmEP/////W/nQg/////9iArZD/////2v7RoP/////bwJAQ/////9zes6D/" +"////3amskP/////evpWg/////9+JjpD/////4J53oP/////haXCQ/////+J+WaD/////4" +"0lSkP/////kXjug/////+UpNJD/////5kdYIP/////nElEQ/////+gnOiD/////6PIzEP" +"/////qBxwg/////+rSFRD/////6+b+IP/////ssfcQ/////+3G4CD/////7pHZEP/////" +"vr/yg//////BxuxD/////8Y/eoP/////yf8GQ//////NvwKD/////9F+jkP/////1T6Kg" +"//////Y/hZD/////9y+EoP/////4KKIQ//////kPZqD/////+giEEP/////6+IMg/////" +"/voZhD//////NhlIP/////9yEgQ//////64RyD//////6gqEAAAAAAAmCkgAAAAAAGIDB" +"AAAAAAAngLIAAAAAADcSiQAAAAAARhJ6AAAAAABVEKkAAAAAAGQQmgAAAAAAcw7JAAAAA" +"AB41DoAAAAAAJEM6QAAAAAAmtvyAAAAAACvCwkAAAAAAL4K+gAAAAAAzZzRAAAAAADcCR" +"oAAAAAAOua8QAAAAAA+priAAAAAAEJmREAAAAAARiZAgAAAAABJ5cxAAAAAAE2lyIAAAA" +"AAUWVUQAAAAABVJVCAAAAAAFjk3EAAAAAAXKTYgAAAAABgiU5AAAAAAGQkYIAAAAAAaAj" +"WQAAAAABryNKAAAAAAG+IXkAAAAAAc0hagAAAAAB3B+ZAAAAAAHrH4oAAAAAAfoduQAAA" +"AACB2KyAAAAAAIYG9kAAAAAAiVg0gAAAAACNq2hAAAAAAJDXvIAAAAAAlSrwQAAAAACYV" +"0SAAAAAAJyqeEAAAAAAn/u2gAAAAACkKgBAAAAAAKd7PoAAAAAAq6mIQAAAAACu+saAAA" +"AAALNN+kAAAAAAtnpOgAAAAAC6zYJAAAAAAL351oAAAAAAwk0KQAAAAADFnkiAAAAAAMn" +"MkkAAAAAAzR3QgAAAAADRTBpAAAAAANSdWIAAAAAA2MuiQAAAAADcHOCAAAAAAOBwFEAA" +"AAAA45xogAAAAADn75xAAAAAAOsb8IAAAAAA728kQAAAAADywGKAAAAAAPburEAAAAAA+" +"j/qgAAAAAD+bjRAAAAAAQG/coAAAAABBhKmQAAAAAEJPvqAAAAAAQ2SLkAAAAABEL6CgA" +"AAAAEVEbZAAAAAARfPTIAAAAABHLYoQAAAAAEfTtSAAAAAASQ1sEAAAAABJs5cgAAAAAE" +"rtThAAAAAAS5yzoAAAAABM1mqQAAAAAE18laAAAAAATrZMkAAAAABPXHegAAAAAFCWLpA" +"AAAAAUTxZoAAAAABSdhCQAAAAAFMcO6AAAAAAVFXykAAAAABU/B2gAAAAAFY11JAAAAAA" +"VuU6IAAAAABYHvEQAAAAAFjFHCAAAAAAWf7TEAAAAABapP4gAAAAAFvetRAAAAAAXITgI" +"AAAAABdvpcQAAAAAF5kwiAAAAAAX555EAAAAABgTd6gAAAAAGGHlZAAAAAAYi3AoAAAAA" +"BjZ3eQAAAAAGQNoqAAAAAAZUdZkAAAAABl7YSgAAAAAGcnO5AAAAAAZ81moAAAAABpBx2" +"QAAAAAGmtSKAAAAAAaub/kAAAAABrlmUgAAAAAGzQHBAAAAAAbXZHIAAAAABur/4QAAAA" +"AG9WKSAAAAAAcI/gEAAAAABxNgsgAAAAAHJvwhAAAAAAcxXtIAAAAAB0T6QQAAAAAHT/C" +"aAAAAAAdjjAkAAAAAB23uugAAAAAHgYopAAAAAAeL7NoAAAAAB5+ISQAAAAAHqer6AAAA" +"AAe9hmkAAAAAB8fpGgAAAAAH24SJAAAAAAfl5zoAAAAAB/mCqQAAIBAgECAwQCAQIBAgE" +"CAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIB" +"AgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECA" +"QIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAgECAQIBAg" +"ECAQIBAgECAQIBAgECAQIBAv//kSYAAP//nZABBP//j4AACP//nZABDP//nZABEExNVAB" +"QRFQAUFNUAFBXVABQUFQAAAAAAAEAAAAAAQpQU1Q4UERULE0zLjIuMCxNMTEuMS4wCg=="); + + const std::string& getVariantFromZone(const Timezone& zone, + const std::string& date) { + tm timeStruct; + if (strptime(date.c_str(), "%F %H:%M:%S", &timeStruct) == nullptr) { + throw TimezoneError("bad time " + date); + } + return zone.getVariant(timegm(&timeStruct)).name; + } + + TEST(TestTimezone, testParser) { + std::unique_ptr<Timezone> la = getTimezone("America/Los_Angeles", + decodeBase64(LA_VER1)); + EXPECT_EQ(1, la->getVersion()); + EXPECT_EQ("PST", getVariantFromZone(*la, "1974-01-06 09:59:59")); + EXPECT_EQ("PDT", getVariantFromZone(*la, "1974-01-06 10:00:00")); + // v1 won't have information past 2038 + EXPECT_EQ("PST", getVariantFromZone(*la, "2100-03-14 09:59:59")); + EXPECT_EQ("PST", getVariantFromZone(*la, "2100-03-14 10:00:00")); + + la = getTimezone("America/Los_Angeles", decodeBase64(LA_VER2)); + EXPECT_EQ(2, la->getVersion()); + EXPECT_EQ("PST", getVariantFromZone(*la, "1974-01-06 09:59:59")); + EXPECT_EQ("PDT", getVariantFromZone(*la, "1974-01-06 10:00:00")); + EXPECT_EQ("PST", getVariantFromZone(*la, "2100-03-14 09:59:59")); + EXPECT_EQ("PDT", getVariantFromZone(*la, "2100-03-14 10:00:00")); + } + + TEST(TestTimezone, testZoneCache) { + const Timezone *la1 = &getTimezoneByName("America/Los_Angeles"); + const Timezone *ny1 = &getTimezoneByName("America/New_York"); + const Timezone *la2 = &getTimezoneByName("America/Los_Angeles"); + const Timezone *ny2 = &getTimezoneByName("America/New_York"); + EXPECT_EQ(la1, la2); + EXPECT_EQ(ny1, ny2); + EXPECT_EQ("PST", getVariantFromZone(*la1, "1974-01-06 09:59:59")); + EXPECT_EQ("PDT", getVariantFromZone(*la1, "1974-01-06 10:00:00")); + EXPECT_EQ("PDT", getVariantFromZone(*la1, "1974-10-27 08:59:59")); + EXPECT_EQ("PST", getVariantFromZone(*la1, "1974-10-27 09:00:00")); + EXPECT_EQ("EST", getVariantFromZone(*ny1, "1974-01-06 06:59:59")); + EXPECT_EQ("EDT", getVariantFromZone(*ny1, "1974-01-06 07:00:00")); + EXPECT_EQ("EDT", getVariantFromZone(*ny1, "1974-10-27 05:59:59")); + EXPECT_EQ("EST", getVariantFromZone(*ny1, "1974-10-27 06:00:00")); + } +} // namespace orc http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/docker/run-all.sh ---------------------------------------------------------------------- diff --git a/docker/run-all.sh b/docker/run-all.sh index 0a34b9c..d124b3c 100755 --- a/docker/run-all.sh +++ b/docker/run-all.sh @@ -15,11 +15,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +REPOSITORY="https://github.com/apache/orc.git" +BRANCH="master" + start=`date` for os in centos5 centos6 centos7 debian6 debian7 ubuntu12 ubuntu14; do echo "Testing $os" ( cd $os && docker build -t "orc-$os" . ) - docker run "orc-$os" || exit 1 + docker run "orc-$os" /bin/bash -c "git clone $REPOSITORY -b $BRANCH && mkdir orc/build && cd orc/build && cmake .. && make package test-out" || exit 1 done echo "Start: $start" -echo "End:" `date` \ No newline at end of file +echo "End:" `date` http://git-wip-us.apache.org/repos/asf/orc/blob/6dce746c/tools/src/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/tools/src/CMakeLists.txt b/tools/src/CMakeLists.txt index 69cadd8..33f294e 100644 --- a/tools/src/CMakeLists.txt +++ b/tools/src/CMakeLists.txt @@ -18,7 +18,7 @@ include_directories ( ${PROTOBUF_INCLUDE_DIRS} ) -set (CMAKE_CXX_FLAGS "-O0 ${CMAKE_CXX_FLAGS} -g ${CXX11_FLAGS} ${WARN_FLAGS}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${CXX11_FLAGS} ${WARN_FLAGS}") add_executable (file-contents FileContents.cc
