This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 15030ef810808d62b3d21883fc76ad2beb14deb0
Author: Owen O'Malley <[email protected]>
AuthorDate: Fri Aug 21 16:51:08 2020 -0700

    ORC-661: Use ChronoLocalDate and day of epoch instead of java's Date for 
column statistics.
    
    Fixes #540
    
    Signed-off-by: Owen O'Malley <[email protected]>
---
 .../java/org/apache/orc/DateColumnStatistics.java  |  29 ++++++
 .../org/apache/orc/impl/ColumnStatisticsImpl.java  | 114 ++++++++++++---------
 .../src/java/org/apache/orc/impl/DateUtils.java    |   6 +-
 .../java/org/apache/orc/impl/RecordReaderImpl.java |  37 +++++--
 .../org/apache/orc/impl/writer/TreeWriterBase.java |   7 +-
 .../test/org/apache/orc/TestColumnStatistics.java  |  28 +++++
 .../org/apache/orc/TestProlepticConversions.java   |  27 +++--
 .../org/apache/orc/impl/TestRecordReaderImpl.java  |  59 ++++++-----
 8 files changed, 209 insertions(+), 98 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/DateColumnStatistics.java 
b/java/core/src/java/org/apache/orc/DateColumnStatistics.java
index 59efc16..7d810b1 100644
--- a/java/core/src/java/org/apache/orc/DateColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/DateColumnStatistics.java
@@ -17,21 +17,50 @@
  */
 package org.apache.orc;
 
+import java.time.LocalDate;
+import java.time.chrono.ChronoLocalDate;
 import java.util.Date;
 
 /**
  * Statistics for DATE columns.
  */
 public interface DateColumnStatistics extends ColumnStatistics {
+
+  /**
+   * Get the minimum value for the column.
+   * @return minimum value as a LocalDate
+   */
+  ChronoLocalDate getMinimumLocalDate();
+
+  /**
+   * Get the minimum value for the column.
+   * @return minimum value as days since epoch (1 Jan 1970)
+   */
+  long getMinimumDayOfEpoch();
+
+  /**
+   * Get the maximum value for the column.
+   * @return maximum value as a LocalDate
+   */
+  ChronoLocalDate getMaximumLocalDate();
+
+  /**
+   * Get the maximum value for the column.
+   * @return maximum value as days since epoch (1 Jan 1970)
+   */
+  long getMaximumDayOfEpoch();
+
   /**
    * Get the minimum value for the column.
    * @return minimum value
+   * @deprecated Use #getMinimumLocalDate instead
    */
   Date getMinimum();
 
   /**
    * Get the maximum value for the column.
    * @return maximum value
+   * @deprecated Use #getMaximumLocalDate instead
    */
   Date getMaximum();
 }
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java 
b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 263c3bb..587e33b 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -17,6 +17,10 @@
  */
 package org.apache.orc.impl;
 
+import java.time.LocalDate;
+import java.time.chrono.ChronoLocalDate;
+import java.time.chrono.Chronology;
+import java.time.chrono.IsoChronology;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -39,6 +43,8 @@ import org.apache.orc.TypeDescription;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.TimeZone;
+import org.threeten.extra.chrono.HybridChronology;
+
 
 public class ColumnStatisticsImpl implements ColumnStatistics {
 
@@ -1473,16 +1479,23 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
 
   private static final class DateStatisticsImpl extends ColumnStatisticsImpl
       implements DateColumnStatistics {
-    private Integer minimum = null;
-    private Integer maximum = null;
+    private int minimum = Integer.MAX_VALUE;
+    private int maximum = Integer.MIN_VALUE;
+    private final Chronology chronology;
+
+    static Chronology getInstance(boolean proleptic) {
+      return proleptic ? IsoChronology.INSTANCE : HybridChronology.INSTANCE;
+    }
 
-    DateStatisticsImpl() {
+    DateStatisticsImpl(boolean convertToProleptic) {
+      this.chronology = getInstance(convertToProleptic);
     }
 
     DateStatisticsImpl(OrcProto.ColumnStatistics stats,
                        boolean writerUsedProlepticGregorian,
                        boolean convertToProlepticGregorian) {
       super(stats);
+      this.chronology = getInstance(convertToProlepticGregorian);
       OrcProto.DateStatistics dateStats = stats.getDateStatistics();
       // min,max values serialized/deserialized as int (days since epoch)
       if (dateStats.hasMaximum()) {
@@ -1498,30 +1511,26 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
     @Override
     public void reset() {
       super.reset();
-      minimum = null;
-      maximum = null;
+      minimum = Integer.MAX_VALUE;
+      maximum = Integer.MIN_VALUE;
     }
 
     @Override
     public void updateDate(DateWritable value) {
-      if (minimum == null) {
+      if (minimum > value.getDays()) {
         minimum = value.getDays();
-        maximum = value.getDays();
-      } else if (minimum > value.getDays()) {
-        minimum = value.getDays();
-      } else if (maximum < value.getDays()) {
+      }
+      if (maximum < value.getDays()) {
         maximum = value.getDays();
       }
     }
 
     @Override
     public void updateDate(int value) {
-      if (minimum == null) {
-        minimum = value;
-        maximum = value;
-      } else if (minimum > value) {
+      if (minimum > value) {
         minimum = value;
-      } else if (maximum < value) {
+      }
+      if (maximum < value) {
         maximum = value;
       }
     }
@@ -1530,19 +1539,10 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
     public void merge(ColumnStatisticsImpl other) {
       if (other instanceof DateStatisticsImpl) {
         DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
-        if (minimum == null) {
-          minimum = dateStats.minimum;
-          maximum = dateStats.maximum;
-        } else if (dateStats.minimum != null) {
-          if (minimum > dateStats.minimum) {
-            minimum = dateStats.minimum;
-          }
-          if (maximum < dateStats.maximum) {
-            maximum = dateStats.maximum;
-          }
-        }
+        minimum = Math.min(minimum, dateStats.minimum);
+        maximum = Math.max(maximum, dateStats.maximum);
       } else {
-        if (isStatsExists() && minimum != null) {
+        if (isStatsExists() && count != 0) {
           throw new IllegalArgumentException("Incompatible merging of date 
column statistics");
         }
       }
@@ -1554,7 +1554,7 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
       OrcProto.ColumnStatistics.Builder result = super.serialize();
       OrcProto.DateStatistics.Builder dateStats =
           OrcProto.DateStatistics.newBuilder();
-      if (getNumberOfValues() != 0 && minimum != null) {
+      if (count != 0) {
         dateStats.setMinimum(minimum);
         dateStats.setMaximum(maximum);
       }
@@ -1562,24 +1562,41 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
       return result;
     }
 
-    private transient final DateWritable minDate = new DateWritable();
-    private transient final DateWritable maxDate = new DateWritable();
+    @Override
+    public ChronoLocalDate getMinimumLocalDate() {
+      return count == 0 ? null : chronology.dateEpochDay(minimum);
+    }
+
+    @Override
+    public long getMinimumDayOfEpoch() {
+      return minimum;
+    }
+
+    @Override
+    public ChronoLocalDate getMaximumLocalDate() {
+      return count == 0 ? null : chronology.dateEpochDay(maximum);
+    }
+
+    @Override
+    public long getMaximumDayOfEpoch() {
+      return maximum;
+    }
 
     @Override
     public Date getMinimum() {
-      if (minimum == null) {
+      if (count == 0) {
         return null;
       }
-      minDate.set(minimum);
+      DateWritable minDate = new DateWritable(minimum);
       return minDate.get();
     }
 
     @Override
     public Date getMaximum() {
-      if (maximum == null) {
+      if (count == 0) {
         return null;
       }
-      maxDate.set(maximum);
+      DateWritable maxDate = new DateWritable(maximum);
       return maxDate.get();
     }
 
@@ -1588,9 +1605,9 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
       StringBuilder buf = new StringBuilder(super.toString());
       if (getNumberOfValues() != 0) {
         buf.append(" min: ");
-        buf.append(getMinimum());
+        buf.append(getMinimumLocalDate());
         buf.append(" max: ");
-        buf.append(getMaximum());
+        buf.append(getMaximumLocalDate());
       }
       return buf.toString();
     }
@@ -1609,16 +1626,10 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
 
       DateStatisticsImpl that = (DateStatisticsImpl) o;
 
-      if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != 
null) {
-        return false;
-      }
-      if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != 
null) {
-        return false;
-      }
-      if (minDate != null ? !minDate.equals(that.minDate) : that.minDate != 
null) {
+      if (minimum != that.minimum) {
         return false;
       }
-      if (maxDate != null ? !maxDate.equals(that.maxDate) : that.maxDate != 
null) {
+      if (maximum != that.maximum) {
         return false;
       }
 
@@ -1628,10 +1639,8 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
     @Override
     public int hashCode() {
       int result = super.hashCode();
-      result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
-      result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
-      result = 31 * result + (minDate != null ? minDate.hashCode() : 0);
-      result = 31 * result + (maxDate != null ? maxDate.hashCode() : 0);
+      result = 31 * result + minimum;
+      result = 31 * result + maximum;
       return result;
     }
   }
@@ -1983,6 +1992,11 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
   }
 
   public static ColumnStatisticsImpl create(TypeDescription schema) {
+    return create(schema, false);
+  }
+
+  public static ColumnStatisticsImpl create(TypeDescription schema,
+                                            boolean convertToProleptic) {
     switch (schema.getCategory()) {
       case BOOLEAN:
         return new BooleanStatisticsImpl();
@@ -2008,7 +2022,7 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
           return new DecimalStatisticsImpl();
         }
       case DATE:
-        return new DateStatisticsImpl();
+        return new DateStatisticsImpl(convertToProleptic);
       case TIMESTAMP:
         return new TimestampStatisticsImpl();
       case TIMESTAMP_INSTANT:
@@ -2022,7 +2036,7 @@ public class ColumnStatisticsImpl implements 
ColumnStatistics {
 
   public static ColumnStatisticsImpl deserialize(TypeDescription schema,
                                                  OrcProto.ColumnStatistics 
stats) {
-    return deserialize(schema, stats, false, false);
+    return deserialize(schema, stats, true, true);
   }
 
   public static ColumnStatisticsImpl deserialize(TypeDescription schema,
diff --git a/java/core/src/java/org/apache/orc/impl/DateUtils.java 
b/java/core/src/java/org/apache/orc/impl/DateUtils.java
index 44c3c7e..8660dc3 100644
--- a/java/core/src/java/org/apache/orc/impl/DateUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/DateUtils.java
@@ -97,12 +97,12 @@ public class DateUtils {
    * @return day of epoch in the hybrid Julian/Gregorian
    */
   public static int convertDateToHybrid(int proleptic) {
-    int hyrbid = proleptic;
+    int hybrid = proleptic;
     if (proleptic < SWITCHOVER_DAYS) {
       String dateStr = 
PROLEPTIC_DATE_FORMAT.format(LocalDate.ofEpochDay(proleptic));
-      hyrbid = (int) 
LocalDate.from(HYBRID_DATE_FORMAT.parse(dateStr)).toEpochDay();
+      hybrid = (int) 
LocalDate.from(HYBRID_DATE_FORMAT.parse(dateStr)).toEpochDay();
     }
-    return hyrbid;
+    return hybrid;
   }
 
   /**
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 5851c75..9358bf4 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -17,6 +17,13 @@
  */
 package org.apache.orc.impl;
 
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.chrono.ChronoLocalDate;
+import java.time.format.DateTimeFormatter;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -25,7 +32,6 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.util.TimestampUtils;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.orc.BooleanColumnStatistics;
@@ -55,10 +61,10 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.math.BigDecimal;
-import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Date;
 import java.util.List;
 import java.util.TimeZone;
 
@@ -400,8 +406,8 @@ public class RecordReaderImpl implements RecordReader {
           stats.getMaximum() == null);
     } else if (index instanceof DateColumnStatistics) {
       DateColumnStatistics stats = (DateColumnStatistics) index;
-      java.util.Date min = stats.getMinimum();
-      java.util.Date max = stats.getMaximum();
+      ChronoLocalDate min = stats.getMinimumLocalDate();
+      ChronoLocalDate max = stats.getMaximumLocalDate();
       return new ValueRange<>(predicate, min, max, stats.hasNull());
     } else if (index instanceof DecimalColumnStatistics) {
       DecimalColumnStatistics stats = (DecimalColumnStatistics) index;
@@ -734,8 +740,8 @@ public class RecordReaderImpl implements RecordReader {
           result = TruthValue.YES_NO_NULL;
         }
       }
-    } else if (predObj instanceof Date) {
-      if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
+    } else if (predObj instanceof ChronoLocalDate) {
+      if (bf.testLong(((ChronoLocalDate) predObj).toEpochDay())) {
         result = TruthValue.YES_NO_NULL;
       }
     } else {
@@ -783,12 +789,17 @@ public class RecordReaderImpl implements RecordReader {
           return Boolean.valueOf(obj.toString());
         }
       case DATE:
-        if (obj instanceof Date) {
+        if (obj instanceof ChronoLocalDate) {
           return obj;
+        } else if (obj instanceof java.sql.Date) {
+          return ((java.sql.Date) obj).toLocalDate();
+        } else if (obj instanceof Date) {
+          return LocalDateTime.ofInstant(((Date) obj).toInstant(),
+              ZoneOffset.UTC).toLocalDate();
         } else if (obj instanceof String) {
-          return Date.valueOf((String) obj);
+          return LocalDate.parse((String) obj);
         } else if (obj instanceof Timestamp) {
-          return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L);
+          return ((Timestamp) obj).toLocalDateTime().toLocalDate();
         }
         // always string, but prevent the comparison to numbers (are they 
days/seconds/milliseconds?)
         break;
@@ -841,6 +852,11 @@ public class RecordReaderImpl implements RecordReader {
         }
         break;
       case STRING:
+        if (obj instanceof ChronoLocalDate) {
+          ChronoLocalDate date = (ChronoLocalDate) obj;
+          return date.format(DateTimeFormatter.ISO_LOCAL_DATE
+              .withChronology(date.getChronology()));
+        }
         return (obj.toString());
       case TIMESTAMP:
         if (obj instanceof Timestamp) {
@@ -857,6 +873,9 @@ public class RecordReaderImpl implements RecordReader {
           return TimestampUtils.decimalToTimestamp(((HiveDecimalWritable) 
obj).getHiveDecimal());
         } else if (obj instanceof Date) {
           return new Timestamp(((Date) obj).getTime());
+        } else if (obj instanceof ChronoLocalDate) {
+          return new Timestamp(((ChronoLocalDate) 
obj).atTime(LocalTime.MIDNIGHT)
+              .toInstant(ZoneOffset.UTC).getEpochSecond() * 1000L);
         }
         // float/double conversion to timestamp is interpreted as seconds 
whereas integer conversion
         // to timestamp is interpreted as milliseconds by default. The integer 
to timestamp casting
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java 
b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
index b494203..ba41837 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
@@ -89,9 +89,10 @@ public abstract class TreeWriterBase implements TreeWriter {
     isPresent = new BitFieldWriter(isPresentOutStream, 1);
     this.foundNulls = false;
     createBloomFilter = context.getBloomFilterColumns()[id];
-    indexStatistics = ColumnStatisticsImpl.create(schema);
-    stripeColStatistics = ColumnStatisticsImpl.create(schema);
-    fileStatistics = ColumnStatisticsImpl.create(schema);
+    boolean proleptic = context.getProlepticGregorian();
+    indexStatistics = ColumnStatisticsImpl.create(schema, proleptic);
+    stripeColStatistics = ColumnStatisticsImpl.create(schema, proleptic);
+    fileStatistics = ColumnStatisticsImpl.create(schema, proleptic);
     if (context.buildIndex()) {
       rowIndex = OrcProto.RowIndex.newBuilder();
       rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java 
b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index ff62cca..fe2700f 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -388,8 +388,10 @@ public class TestColumnStatistics {
     ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
     stats1.updateDate(new DateWritable(1000));
     stats1.updateDate(new DateWritable(100));
+    stats1.increment(2);
     stats2.updateDate(new DateWritable(10));
     stats2.updateDate(new DateWritable(2000));
+    stats2.increment(2);
     stats1.merge(stats2);
     DateColumnStatistics typed = (DateColumnStatistics) stats1;
     assertEquals(new DateWritable(10).get(), typed.getMinimum());
@@ -397,12 +399,38 @@ public class TestColumnStatistics {
     stats1.reset();
     stats1.updateDate(new DateWritable(-10));
     stats1.updateDate(new DateWritable(10000));
+    stats1.increment(2);
     stats1.merge(stats2);
     assertEquals(new DateWritable(-10).get(), typed.getMinimum());
     assertEquals(new DateWritable(10000).get(), typed.getMaximum());
   }
 
   @Test
+  public void testLocalDateMerge() throws Exception {
+    TypeDescription schema = TypeDescription.createDate();
+
+    ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+    ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+    stats1.updateDate(1000);
+    stats1.updateDate(100);
+    stats1.increment(2);
+    stats2.updateDate(10);
+    stats2.updateDate(2000);
+    stats2.increment(2);
+    stats1.merge(stats2);
+    DateColumnStatistics typed = (DateColumnStatistics) stats1;
+    assertEquals(10, typed.getMinimumDayOfEpoch());
+    assertEquals(2000, typed.getMaximumDayOfEpoch());
+    stats1.reset();
+    stats1.updateDate(-10);
+    stats1.updateDate(10000);
+    stats1.increment(2);
+    stats1.merge(stats2);
+    assertEquals(-10, typed.getMinimumLocalDate().toEpochDay());
+    assertEquals(10000, typed.getMaximumLocalDate().toEpochDay());
+  }
+
+  @Test
   public void testTimestampMergeUTC() throws Exception {
     TypeDescription schema = TypeDescription.createTimestamp();
 
diff --git a/java/core/src/test/org/apache/orc/TestProlepticConversions.java 
b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
index 95bcb71..b97a6e4 100644
--- a/java/core/src/test/org/apache/orc/TestProlepticConversions.java
+++ b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
@@ -17,6 +17,9 @@
  */
 package org.apache.orc;
 
+import java.time.chrono.Chronology;
+import java.time.chrono.IsoChronology;
+import java.time.format.DateTimeFormatter;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -45,6 +48,7 @@ import java.util.GregorianCalendar;
 import java.util.List;
 import java.util.TimeZone;
 import java.util.concurrent.TimeUnit;
+import org.threeten.extra.chrono.HybridChronology;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -126,11 +130,12 @@ public class TestProlepticConversions {
       t.changeCalendar(writerProlepticGregorian, false);
       i.changeCalendar(writerProlepticGregorian, false);
       GregorianCalendar cal = writerProlepticGregorian ? PROLEPTIC : HYBRID;
-      SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal);
       SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal);
+      Chronology writerChronology = writerProlepticGregorian
+          ? IsoChronology.INSTANCE : HybridChronology.INSTANCE;
       for(int r=0; r < batch.size; ++r) {
-        d.vector[r] = TimeUnit.MILLISECONDS.toDays(
-            dateFormat.parse(String.format("%04d-01-23", r * 2 + 
1)).getTime());
+        d.vector[r] = writerChronology.date(r * 2 + 1, 1, 23)
+            .toEpochDay();
         Date val = timeFormat.parse(
             String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r % 24));
         t.time[r] = val.getTime();
@@ -151,16 +156,18 @@ public class TestProlepticConversions {
       TimestampColumnVector t = (TimestampColumnVector) batch.cols[1];
       TimestampColumnVector i = (TimestampColumnVector) batch.cols[2];
       GregorianCalendar cal = readerProlepticGregorian ? PROLEPTIC : HYBRID;
-      SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal);
       SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal);
+      Chronology readerChronology = readerProlepticGregorian
+          ? IsoChronology.INSTANCE : HybridChronology.INSTANCE;
+      DateTimeFormatter dateFormat = 
DateTimeFormatter.ISO_LOCAL_DATE.withChronology(readerChronology);
 
       // Check the file statistics
       ColumnStatistics[] colStats = reader.getStatistics();
       DateColumnStatistics dStats = (DateColumnStatistics) colStats[1];
       TimestampColumnStatistics tStats = (TimestampColumnStatistics) 
colStats[2];
       TimestampColumnStatistics iStats = (TimestampColumnStatistics) 
colStats[3];
-      assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum()));
-      assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum()));
+      assertEquals("0001-01-23", 
dStats.getMinimumLocalDate().format(dateFormat));
+      assertEquals("2047-01-23", 
dStats.getMaximumLocalDate().format(dateFormat));
       assertEquals("0001-03-21 00:12:34", 
timeFormat.format(tStats.getMinimum()));
       assertEquals("2047-03-21 15:12:34", 
timeFormat.format(tStats.getMaximum()));
       assertEquals("0001-03-21 00:12:34", 
timeFormat.format(iStats.getMinimum()));
@@ -173,8 +180,8 @@ public class TestProlepticConversions {
       dStats = (DateColumnStatistics) colStats[1];
       tStats = (TimestampColumnStatistics) colStats[2];
       iStats = (TimestampColumnStatistics) colStats[3];
-      assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum()));
-      assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum()));
+      assertEquals("0001-01-23", 
dStats.getMinimumLocalDate().format(dateFormat));
+      assertEquals("2047-01-23", 
dStats.getMaximumLocalDate().format(dateFormat));
       assertEquals("0001-03-21 00:12:34", 
timeFormat.format(tStats.getMinimum()));
       assertEquals("2047-03-21 15:12:34", 
timeFormat.format(tStats.getMaximum()));
       assertEquals("0001-03-21 00:12:34", 
timeFormat.format(iStats.getMinimum()));
@@ -190,8 +197,8 @@ public class TestProlepticConversions {
       for(int r=0; r < batch.size; ++r) {
         String expectedD = String.format("%04d-01-23", r * 2 + 1);
         String expectedT = String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r 
% 24);
-        assertEquals("row " + r, expectedD, dateFormat.format(
-            new Date(TimeUnit.DAYS.toMillis(d.vector[r]))));
+        assertEquals("row " + r, expectedD, 
readerChronology.dateEpochDay(d.vector[r])
+            .format(dateFormat));
         assertEquals("row " + r, expectedT, 
timeFormat.format(t.asScratchTimestamp(r)));
         assertEquals("row " + r, expectedT, 
timeFormat.format(i.asScratchTimestamp(r)));
       }
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index dc12715..6ca24a6 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -41,6 +41,10 @@ import java.sql.Timestamp;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.chrono.ChronoLocalDate;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -75,7 +79,6 @@ import org.apache.orc.impl.RecordReaderImpl.Location;
 import org.apache.orc.impl.RecordReaderImpl.SargApplier;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.orc.ColumnStatistics;
@@ -89,6 +92,13 @@ import org.junit.Test;
 
 public class TestRecordReaderImpl {
 
+  // This is a work around until we update storage-api to allow 
ChronoLocalDate in
+  // predicates.
+  static Date toDate(ChronoLocalDate date) {
+    return new 
Date(date.atTime(LocalTime.MIDNIGHT).atZone(ZoneId.systemDefault())
+        .toEpochSecond() * 1000);
+  }
+
   @Test
   public void testFindColumn() throws Exception {
     Configuration conf = new Configuration();
@@ -148,6 +158,9 @@ public class TestRecordReaderImpl {
                                                   String columnName,
                                                   Object literal,
                                                   List<Object> literalList) {
+    if (literal instanceof ChronoLocalDate) {
+      literal = toDate((ChronoLocalDate) literal);
+    }
     return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
         literal, literalList);
   }
@@ -448,7 +461,7 @@ public class TestRecordReaderImpl {
     OrcProto.DateStatistics.Builder dateStats = 
OrcProto.DateStatistics.newBuilder();
     dateStats.setMinimum(min);
     dateStats.setMaximum(max);
-    return OrcProto.ColumnStatistics.newBuilder()
+    return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(1)
                .setDateStatistics(dateStats.build()).build();
   }
 
@@ -621,7 +634,7 @@ public class TestRecordReaderImpl {
 
     // Integer stats will not be converted date because of days/seconds/millis 
ambiguity
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+        PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
     try {
       evaluateInteger(createIntStats(10, 100), pred);
       fail("evaluate should throw");
@@ -664,7 +677,7 @@ public class TestRecordReaderImpl {
 
     // Double is not converted to date type because of days/seconds/millis 
ambiguity
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+        PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
     try {
       evaluateDouble(createDoubleStats(10.0, 100.0), pred);
       fail("evaluate should throw");
@@ -707,7 +720,7 @@ public class TestRecordReaderImpl {
 
     // IllegalArgumentException is thrown when converting String to Date, 
hence YES_NO
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
+        PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(100), null);
     assertEquals(TruthValue.YES_NO,
         evaluateInteger(createDateStats(10, 1000), pred));
 
@@ -735,7 +748,7 @@ public class TestRecordReaderImpl {
       evaluateInteger(createDateStats(10, 100), pred);
       fail("evaluate should throw");
     } catch (RecordReaderImpl.SargCastException ia) {
-      assertEquals("ORC SARGS could not convert from Date to LONG", 
ia.getMessage());
+      assertEquals("ORC SARGS could not convert from LocalDate to LONG", 
ia.getMessage());
     }
 
     // Date to Float conversion is also not possible.
@@ -745,7 +758,7 @@ public class TestRecordReaderImpl {
       evaluateInteger(createDateStats(10, 100), pred);
       fail("evaluate should throw");
     } catch (RecordReaderImpl.SargCastException ia) {
-      assertEquals("ORC SARGS could not convert from Date to FLOAT", 
ia.getMessage());
+      assertEquals("ORC SARGS could not convert from LocalDate to FLOAT", 
ia.getMessage());
     }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
@@ -779,12 +792,12 @@ public class TestRecordReaderImpl {
         evaluateInteger(createDateStats(10, 100), pred));
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+        PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
     assertEquals(TruthValue.YES_NO,
         evaluateInteger(createDateStats(10, 100), pred));
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
+        PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(150), null);
     assertEquals(TruthValue.NO,
         evaluateInteger(createDateStats(10, 100), pred));
 
@@ -795,7 +808,7 @@ public class TestRecordReaderImpl {
       evaluateInteger(createDateStats(10, 100), pred);
       fail("evaluate should throw");
     } catch (RecordReaderImpl.SargCastException ia) {
-      assertEquals("ORC SARGS could not convert from Date to DECIMAL", 
ia.getMessage());
+      assertEquals("ORC SARGS could not convert from LocalDate to DECIMAL", 
ia.getMessage());
     }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
@@ -829,7 +842,7 @@ public class TestRecordReaderImpl {
 
     // Decimal to Date not possible.
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-        PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+        PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
     try {
       evaluateInteger(createDecimalStats("10.0", "100.0"), pred);
       fail("evaluate should throw");
@@ -1704,15 +1717,15 @@ public class TestRecordReaderImpl {
   public void testDateWritableNullSafeEqualsBloomFilter() throws Exception {
     PredicateLeaf pred = createPredicateLeaf(
         PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x",
-        new DateWritable(15).get(), null);
+        LocalDate.ofEpochDay(15), null);
     BloomFilter bf = new BloomFilter(10000);
     for (int i = 20; i < 1000; i++) {
-      bf.addLong((new DateWritable(i)).getDays());
+      bf.addLong(i);
     }
     ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, 
createDateStats(10, 100));
     assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, 
bf));
 
-    bf.addLong((new DateWritable(15)).getDays());
+    bf.addLong(15);
     assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, 
pred, bf));
   }
 
@@ -1720,37 +1733,37 @@ public class TestRecordReaderImpl {
   public void testDateWritableEqualsBloomFilter() throws Exception {
     PredicateLeaf pred = createPredicateLeaf(
         PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x",
-        new DateWritable(15).get(), null);
+        LocalDate.ofEpochDay(15), null);
     BloomFilter bf = new BloomFilter(10000);
     for (int i = 20; i < 1000; i++) {
-      bf.addLong((new DateWritable(i)).getDays());
+      bf.addLong(i);
     }
     ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, 
createDateStats(10, 100));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, 
pred, bf));
 
-    bf.addLong((new DateWritable(15)).getDays());
+    bf.addLong(15);
     assertEquals(TruthValue.YES_NO_NULL, 
RecordReaderImpl.evaluatePredicate(cs, pred, bf));
   }
 
   @Test
   public void testDateWritableInBloomFilter() throws Exception {
-    List<Object> args = new ArrayList<Object>();
-    args.add(new DateWritable(15).get());
-    args.add(new DateWritable(19).get());
+    List<Object> args = new ArrayList<>();
+    args.add(toDate(LocalDate.ofEpochDay(15)));
+    args.add(toDate(LocalDate.ofEpochDay(19)));
     PredicateLeaf pred = createPredicateLeaf
         (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE,
             "x", null, args);
     BloomFilter bf = new BloomFilter(10000);
     for (int i = 20; i < 1000; i++) {
-      bf.addLong((new DateWritable(i)).getDays());
+      bf.addLong(i);
     }
     ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, 
createDateStats(10, 100));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, 
pred, bf));
 
-    bf.addLong((new DateWritable(19)).getDays());
+    bf.addLong(19);
     assertEquals(TruthValue.YES_NO_NULL, 
RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
-    bf.addLong((new DateWritable(15)).getDays());
+    bf.addLong(15);
     assertEquals(TruthValue.YES_NO_NULL, 
RecordReaderImpl.evaluatePredicate(cs, pred, bf));
   }
 

Reply via email to