[ https://issues.apache.org/jira/browse/ORC-87?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15991495#comment-15991495 ]
ASF GitHub Bot commented on ORC-87: ----------------------------------- Github user omalley commented on a diff in the pull request: https://github.com/apache/orc/pull/110#discussion_r114196797 --- Diff: c++/src/Statistics.cc --- @@ -279,20 +277,55 @@ namespace orc { } TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl - (const proto::ColumnStatistics& pb, bool correctStats) { + (const proto::ColumnStatistics& pb, const StatContext& statContext) { valueCount = pb.numberofvalues(); - if (!pb.has_timestampstatistics() || !correctStats) { + if (!pb.has_timestampstatistics() || !statContext.correctStats) { _hasMinimum = false; _hasMaximum = false; + _hasLowerBound = false; + _hasUpperBound = false; minimum = 0; maximum = 0; + lowerBound = 0; + upperBound = 0; }else{ const proto::TimestampStatistics& stats = pb.timestampstatistics(); - _hasMinimum = stats.has_minimum(); - _hasMaximum = stats.has_maximum(); - - minimum = stats.minimum(); - maximum = stats.maximum(); + _hasMinimum = stats.has_minimumutc() || (stats.has_minimum() && (statContext.writerTimezone != NULL)); + _hasMaximum = stats.has_maximumutc() || (stats.has_maximum() && (statContext.writerTimezone != NULL)); + _hasLowerBound = stats.has_minimumutc() || stats.has_minimum(); + _hasUpperBound = stats.has_maximumutc() || stats.has_maximum(); + + // Timestamp stats are stored in milliseconds + if (stats.has_minimumutc()) { + minimum = stats.minimumutc(); + lowerBound = minimum; + } else if (statContext.writerTimezone) { + int64_t writerTimeSec = stats.minimum() / 1000; + // multiply the offset by 1000 to convert to millisecond + minimum = stats.minimum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000; + lowerBound = minimum; + } else { + minimum = 0; + // subtract 1 day to handle unknown TZ + lowerBound = stats.minimum() - (24 * 60 * 60 * 1000); + } + + // Timestamp stats are stored in milliseconds + if (stats.has_maximumutc()) { + maximum = stats.maximumutc(); + upperBound = maximum; + } else if (statContext.writerTimezone) { + int64_t writerTimeSec = stats.maximum() / 1000; + // multiply the offset by 1000 to convert to millisecond + maximum = stats.maximum() + (statContext.writerTimezone->getVariant(writerTimeSec).gmtOffset) * 1000; + upperBound = maximum; + } else { + maximum = 0; + // add 1 day to handle unknown TZ + upperBound = stats.maximum() + (SECONDS_PER_DAY * 1000); + } + // Add 1 millisecond to account for microsecond precision of values + maximum += 1; --- End diff -- I don't understand this and the unit tests pass without it. > [C++] Handle missing timezone conversion for timestamp statistics > ----------------------------------------------------------------- > > Key: ORC-87 > URL: https://issues.apache.org/jira/browse/ORC-87 > Project: ORC > Issue Type: Bug > Components: C++ > Reporter: Deepak Majeti > Assignee: Deepak Majeti > > The recent release adds timezone to timestamp values but does not add the > conversion for timestamp statistics. -- This message was sent by Atlassian JIRA (v6.3.15#6346)