orc git commit: ORC-306 Correct pre-1970 timestamps that were off by one second.
Repository: orc Updated Branches: refs/heads/branch-1.4 084ddbc78 -> 6c4865ad9 ORC-306 Correct pre-1970 timestamps that were off by one second. Fixes #220 Signed-off-by: Owen O'Malley Project: http://git-wip-us.apache.org/repos/asf/orc/repo Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/6c4865ad Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/6c4865ad Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/6c4865ad Branch: refs/heads/branch-1.4 Commit: 6c4865ad9acb75c35d97206f31b4dd9e0a3a7cb4 Parents: 084ddbc Author: Owen O'Malley Authored: Mon Feb 26 15:27:52 2018 -0800 Committer: Owen O'Malley Committed: Tue Mar 20 15:26:06 2018 -0700 -- .../org/apache/orc/impl/TreeReaderFactory.java | 12 ++-- .../java/org/apache/orc/impl/WriterImpl.java| 31 ++ .../test/org/apache/orc/TestVectorOrcFile.java | 59 +++- 3 files changed, 60 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/orc/blob/6c4865ad/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java -- diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index 9649be9..08a4359 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -19,6 +19,7 @@ package org.apache.orc.impl; import java.io.EOFException; import java.io.IOException; +import java.sql.Timestamp; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -48,7 +49,6 @@ import org.apache.orc.OrcProto; * Factory for creating ORC tree readers. */ public class TreeReaderFactory { - public interface Context { SchemaEvolution getSchemaEvolution(); @@ -975,12 +975,12 @@ public class TreeReaderFactory { for (int i = 0; i < batchSize; i++) { if (result.noNulls || !result.isNull[i]) { - long millis = data.next() + base_timestamp; - int newNanos = parseNanos(nanos.next()); - if (millis < 0 && newNanos != 0) { -millis -= 1; + final int newNanos = parseNanos(nanos.next()); + long millis = (data.next() + base_timestamp) + * WriterImpl.MILLIS_PER_SECOND + newNanos / 1_000_000; + if (millis < 0 && newNanos > 999_999) { +millis -= WriterImpl.MILLIS_PER_SECOND; } - millis *= WriterImpl.MILLIS_PER_SECOND; long offset = 0; // If reader and writer time zones have different rules, adjust the timezone difference // between reader and writer taking day light savings into account. http://git-wip-us.apache.org/repos/asf/orc/blob/6c4865ad/java/core/src/java/org/apache/orc/impl/WriterImpl.java -- diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java index 32820e1..abd398e 100644 --- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java +++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java @@ -1797,11 +1797,16 @@ public class WriterImpl implements Writer, MemoryManager.Callback { int length) throws IOException { super.writeBatch(vector, offset, length); TimestampColumnVector vec = (TimestampColumnVector) vector; - Timestamp val; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { - val = vec.asScratchTimestamp(0); - long millis = val.getTime(); + // ignore the bottom three digits from the vec.time field + final long secs = vec.time[0] / MILLIS_PER_SECOND; + final int newNanos = vec.nanos[0]; + // set the millis based on the top three digits of the nanos + long millis = secs * MILLIS_PER_SECOND + newNanos / 1_000_000; + if (millis < 0 && newNanos > 999_999) { +millis -= MILLIS_PER_SECOND; + } long utc = SerializationUtils.convertToUtc(localTimezone, millis); indexStatistics.updateTimestamp(utc); if (createBloomFilter) { @@ -1810,22 +1815,26 @@ public class WriterImpl implements Writer, MemoryManager.Callback { } bloomFilterUtf8.addLong(utc); } - final long secs = millis / MILLIS_PER_SECOND - baseEpochSecsLocalTz; - final long nano = formatNanos(val.getNanos()); + final long nano = formatNanos(vec.nanos[0]); for(int i=0; i < length; ++i) { -seconds.write(secs); + seconds.write(secs - baseEpochSecsLocalTz); nanos.write(nano); } } } else { for(int i=0; i <
orc git commit: ORC-306 Correct pre-1970 timestamps that were off by one second.
Repository: orc Updated Branches: refs/heads/master 51b6b6ce3 -> 9c105b92a ORC-306 Correct pre-1970 timestamps that were off by one second. Fixes #220 Signed-off-by: Owen O'Malley Project: http://git-wip-us.apache.org/repos/asf/orc/repo Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/9c105b92 Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/9c105b92 Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/9c105b92 Branch: refs/heads/master Commit: 9c105b92a0c2ab9c624b7bffd3c8b3a91d892175 Parents: 51b6b6c Author: Owen O'Malley Authored: Mon Feb 26 15:27:52 2018 -0800 Committer: Owen O'Malley Committed: Fri Mar 2 10:49:30 2018 -0800 -- .../org/apache/orc/impl/TreeReaderFactory.java | 12 ++-- .../orc/impl/writer/TimestampTreeWriter.java| 31 ++ .../test/org/apache/orc/TestVectorOrcFile.java | 59 +++- 3 files changed, 60 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/orc/blob/9c105b92/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java -- diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java index 1891737..b33ad11 100644 --- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java @@ -19,6 +19,7 @@ package org.apache.orc.impl; import java.io.EOFException; import java.io.IOException; +import java.sql.Timestamp; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -49,7 +50,6 @@ import org.apache.orc.impl.writer.TimestampTreeWriter; * Factory for creating ORC tree readers. */ public class TreeReaderFactory { - public interface Context { SchemaEvolution getSchemaEvolution(); @@ -977,12 +977,12 @@ public class TreeReaderFactory { for (int i = 0; i < batchSize; i++) { if (result.noNulls || !result.isNull[i]) { - long millis = data.next() + base_timestamp; - int newNanos = parseNanos(nanos.next()); - if (millis < 0 && newNanos != 0) { -millis -= 1; + final int newNanos = parseNanos(nanos.next()); + long millis = (data.next() + base_timestamp) + * TimestampTreeWriter.MILLIS_PER_SECOND + newNanos / 1_000_000; + if (millis < 0 && newNanos > 999_999) { +millis -= TimestampTreeWriter.MILLIS_PER_SECOND; } - millis *= TimestampTreeWriter.MILLIS_PER_SECOND; long offset = 0; // If reader and writer time zones have different rules, adjust the timezone difference // between reader and writer taking day light savings into account. http://git-wip-us.apache.org/repos/asf/orc/blob/9c105b92/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java -- diff --git a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java index fae108e..1694ca1 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java +++ b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java @@ -75,11 +75,16 @@ public class TimestampTreeWriter extends TreeWriterBase { int length) throws IOException { super.writeBatch(vector, offset, length); TimestampColumnVector vec = (TimestampColumnVector) vector; -Timestamp val; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { -val = vec.asScratchTimestamp(0); -long millis = val.getTime(); +// ignore the bottom three digits from the vec.time field +final long secs = vec.time[0] / MILLIS_PER_SECOND; +final int newNanos = vec.nanos[0]; +// set the millis based on the top three digits of the nanos +long millis = secs * MILLIS_PER_SECOND + newNanos / 1_000_000; +if (millis < 0 && newNanos > 999_999) { + millis -= MILLIS_PER_SECOND; +} long utc = SerializationUtils.convertToUtc(localTimezone, millis); indexStatistics.updateTimestamp(utc); if (createBloomFilter) { @@ -88,22 +93,26 @@ public class TimestampTreeWriter extends TreeWriterBase { } bloomFilterUtf8.addLong(utc); } -final long secs = millis / MILLIS_PER_SECOND - baseEpochSecsLocalTz; -final long nano = formatNanos(val.getNanos()); +final long nano = formatNanos(vec.nanos[0]); for (int i = 0; i < length; ++i) { - seconds.write(secs); + seconds.write(secs - baseEpochSecsLocalTz); nanos.write(nano); } }