orc git commit: ORC-306 Correct pre-1970 timestamps that were off by one second.

2018-03-20 Thread omalley
Repository: orc
Updated Branches:
  refs/heads/branch-1.4 084ddbc78 -> 6c4865ad9


ORC-306 Correct pre-1970 timestamps that were off by one second.

Fixes #220

Signed-off-by: Owen O'Malley 


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/6c4865ad
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/6c4865ad
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/6c4865ad

Branch: refs/heads/branch-1.4
Commit: 6c4865ad9acb75c35d97206f31b4dd9e0a3a7cb4
Parents: 084ddbc
Author: Owen O'Malley 
Authored: Mon Feb 26 15:27:52 2018 -0800
Committer: Owen O'Malley 
Committed: Tue Mar 20 15:26:06 2018 -0700

--
 .../org/apache/orc/impl/TreeReaderFactory.java  | 12 ++--
 .../java/org/apache/orc/impl/WriterImpl.java| 31 ++
 .../test/org/apache/orc/TestVectorOrcFile.java  | 59 +++-
 3 files changed, 60 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/6c4865ad/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
--
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java 
b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 9649be9..08a4359 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -19,6 +19,7 @@ package org.apache.orc.impl;
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.sql.Timestamp;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
@@ -48,7 +49,6 @@ import org.apache.orc.OrcProto;
  * Factory for creating ORC tree readers.
  */
 public class TreeReaderFactory {
-
   public interface Context {
 SchemaEvolution getSchemaEvolution();
 
@@ -975,12 +975,12 @@ public class TreeReaderFactory {
 
   for (int i = 0; i < batchSize; i++) {
 if (result.noNulls || !result.isNull[i]) {
-  long millis = data.next() + base_timestamp;
-  int newNanos = parseNanos(nanos.next());
-  if (millis < 0 && newNanos != 0) {
-millis -= 1;
+  final int newNanos = parseNanos(nanos.next());
+  long millis = (data.next() + base_timestamp)
+  * WriterImpl.MILLIS_PER_SECOND + newNanos / 1_000_000;
+  if (millis < 0 && newNanos > 999_999) {
+millis -= WriterImpl.MILLIS_PER_SECOND;
   }
-  millis *= WriterImpl.MILLIS_PER_SECOND;
   long offset = 0;
   // If reader and writer time zones have different rules, adjust the 
timezone difference
   // between reader and writer taking day light savings into account.

http://git-wip-us.apache.org/repos/asf/orc/blob/6c4865ad/java/core/src/java/org/apache/orc/impl/WriterImpl.java
--
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java 
b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 32820e1..abd398e 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -1797,11 +1797,16 @@ public class WriterImpl implements Writer, 
MemoryManager.Callback {
 int length) throws IOException {
   super.writeBatch(vector, offset, length);
   TimestampColumnVector vec = (TimestampColumnVector) vector;
-  Timestamp val;
   if (vector.isRepeating) {
 if (vector.noNulls || !vector.isNull[0]) {
-  val = vec.asScratchTimestamp(0);
-  long millis = val.getTime();
+  // ignore the bottom three digits from the vec.time field
+  final long secs = vec.time[0] / MILLIS_PER_SECOND;
+  final int newNanos = vec.nanos[0];
+  // set the millis based on the top three digits of the nanos
+  long millis = secs * MILLIS_PER_SECOND + newNanos / 1_000_000;
+  if (millis < 0 && newNanos > 999_999) {
+millis -= MILLIS_PER_SECOND;
+  }
   long utc = SerializationUtils.convertToUtc(localTimezone, millis);
   indexStatistics.updateTimestamp(utc);
   if (createBloomFilter) {
@@ -1810,22 +1815,26 @@ public class WriterImpl implements Writer, 
MemoryManager.Callback {
 }
 bloomFilterUtf8.addLong(utc);
   }
-  final long secs = millis / MILLIS_PER_SECOND - baseEpochSecsLocalTz;
-  final long nano = formatNanos(val.getNanos());
+ final long nano = formatNanos(vec.nanos[0]);
   for(int i=0; i < length; ++i) {
-seconds.write(secs);
+ seconds.write(secs - baseEpochSecsLocalTz);
 nanos.write(nano);
   }
 }
   } else {
 for(int i=0; i < 

orc git commit: ORC-306 Correct pre-1970 timestamps that were off by one second.

2018-03-02 Thread omalley
Repository: orc
Updated Branches:
  refs/heads/master 51b6b6ce3 -> 9c105b92a


ORC-306 Correct pre-1970 timestamps that were off by one second.

Fixes #220

Signed-off-by: Owen O'Malley 


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/9c105b92
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/9c105b92
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/9c105b92

Branch: refs/heads/master
Commit: 9c105b92a0c2ab9c624b7bffd3c8b3a91d892175
Parents: 51b6b6c
Author: Owen O'Malley 
Authored: Mon Feb 26 15:27:52 2018 -0800
Committer: Owen O'Malley 
Committed: Fri Mar 2 10:49:30 2018 -0800

--
 .../org/apache/orc/impl/TreeReaderFactory.java  | 12 ++--
 .../orc/impl/writer/TimestampTreeWriter.java| 31 ++
 .../test/org/apache/orc/TestVectorOrcFile.java  | 59 +++-
 3 files changed, 60 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/orc/blob/9c105b92/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
--
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java 
b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 1891737..b33ad11 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -19,6 +19,7 @@ package org.apache.orc.impl;
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.sql.Timestamp;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
@@ -49,7 +50,6 @@ import org.apache.orc.impl.writer.TimestampTreeWriter;
  * Factory for creating ORC tree readers.
  */
 public class TreeReaderFactory {
-
   public interface Context {
 SchemaEvolution getSchemaEvolution();
 
@@ -977,12 +977,12 @@ public class TreeReaderFactory {
 
   for (int i = 0; i < batchSize; i++) {
 if (result.noNulls || !result.isNull[i]) {
-  long millis = data.next() + base_timestamp;
-  int newNanos = parseNanos(nanos.next());
-  if (millis < 0 && newNanos != 0) {
-millis -= 1;
+  final int newNanos = parseNanos(nanos.next());
+  long millis = (data.next() + base_timestamp)
+  * TimestampTreeWriter.MILLIS_PER_SECOND + newNanos / 1_000_000;
+  if (millis < 0 && newNanos > 999_999) {
+millis -= TimestampTreeWriter.MILLIS_PER_SECOND;
   }
-  millis *= TimestampTreeWriter.MILLIS_PER_SECOND;
   long offset = 0;
   // If reader and writer time zones have different rules, adjust the 
timezone difference
   // between reader and writer taking day light savings into account.

http://git-wip-us.apache.org/repos/asf/orc/blob/9c105b92/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
--
diff --git 
a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java 
b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
index fae108e..1694ca1 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TimestampTreeWriter.java
@@ -75,11 +75,16 @@ public class TimestampTreeWriter extends TreeWriterBase {
  int length) throws IOException {
 super.writeBatch(vector, offset, length);
 TimestampColumnVector vec = (TimestampColumnVector) vector;
-Timestamp val;
 if (vector.isRepeating) {
   if (vector.noNulls || !vector.isNull[0]) {
-val = vec.asScratchTimestamp(0);
-long millis = val.getTime();
+// ignore the bottom three digits from the vec.time field
+final long secs = vec.time[0] / MILLIS_PER_SECOND;
+final int newNanos = vec.nanos[0];
+// set the millis based on the top three digits of the nanos
+long millis = secs * MILLIS_PER_SECOND + newNanos / 1_000_000;
+if (millis < 0 && newNanos > 999_999) {
+  millis -= MILLIS_PER_SECOND;
+}
 long utc = SerializationUtils.convertToUtc(localTimezone, millis);
 indexStatistics.updateTimestamp(utc);
 if (createBloomFilter) {
@@ -88,22 +93,26 @@ public class TimestampTreeWriter extends TreeWriterBase {
   }
   bloomFilterUtf8.addLong(utc);
 }
-final long secs = millis / MILLIS_PER_SECOND - baseEpochSecsLocalTz;
-final long nano = formatNanos(val.getNanos());
+final long nano = formatNanos(vec.nanos[0]);
 for (int i = 0; i < length; ++i) {
-  seconds.write(secs);
+  seconds.write(secs - baseEpochSecsLocalTz);
   nanos.write(nano);
 }
   }