This is an automated email from the ASF dual-hosted git repository. omalley pushed a commit to branch branch-1.6 in repository https://gitbox.apache.org/repos/asf/orc.git
commit 15030ef810808d62b3d21883fc76ad2beb14deb0 Author: Owen O'Malley <[email protected]> AuthorDate: Fri Aug 21 16:51:08 2020 -0700 ORC-661: Use ChronoLocalDate and day of epoch instead of java's Date for column statistics. Fixes #540 Signed-off-by: Owen O'Malley <[email protected]> --- .../java/org/apache/orc/DateColumnStatistics.java | 29 ++++++ .../org/apache/orc/impl/ColumnStatisticsImpl.java | 114 ++++++++++++--------- .../src/java/org/apache/orc/impl/DateUtils.java | 6 +- .../java/org/apache/orc/impl/RecordReaderImpl.java | 37 +++++-- .../org/apache/orc/impl/writer/TreeWriterBase.java | 7 +- .../test/org/apache/orc/TestColumnStatistics.java | 28 +++++ .../org/apache/orc/TestProlepticConversions.java | 27 +++-- .../org/apache/orc/impl/TestRecordReaderImpl.java | 59 ++++++----- 8 files changed, 209 insertions(+), 98 deletions(-) diff --git a/java/core/src/java/org/apache/orc/DateColumnStatistics.java b/java/core/src/java/org/apache/orc/DateColumnStatistics.java index 59efc16..7d810b1 100644 --- a/java/core/src/java/org/apache/orc/DateColumnStatistics.java +++ b/java/core/src/java/org/apache/orc/DateColumnStatistics.java @@ -17,21 +17,50 @@ */ package org.apache.orc; +import java.time.LocalDate; +import java.time.chrono.ChronoLocalDate; import java.util.Date; /** * Statistics for DATE columns. */ public interface DateColumnStatistics extends ColumnStatistics { + + /** + * Get the minimum value for the column. + * @return minimum value as a LocalDate + */ + ChronoLocalDate getMinimumLocalDate(); + + /** + * Get the minimum value for the column. + * @return minimum value as days since epoch (1 Jan 1970) + */ + long getMinimumDayOfEpoch(); + + /** + * Get the maximum value for the column. + * @return maximum value as a LocalDate + */ + ChronoLocalDate getMaximumLocalDate(); + + /** + * Get the maximum value for the column. + * @return maximum value as days since epoch (1 Jan 1970) + */ + long getMaximumDayOfEpoch(); + /** * Get the minimum value for the column. * @return minimum value + * @deprecated Use #getMinimumLocalDate instead */ Date getMinimum(); /** * Get the maximum value for the column. * @return maximum value + * @deprecated Use #getMaximumLocalDate instead */ Date getMaximum(); } diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java index 263c3bb..587e33b 100644 --- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java @@ -17,6 +17,10 @@ */ package org.apache.orc.impl; +import java.time.LocalDate; +import java.time.chrono.ChronoLocalDate; +import java.time.chrono.Chronology; +import java.time.chrono.IsoChronology; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -39,6 +43,8 @@ import org.apache.orc.TypeDescription; import java.sql.Date; import java.sql.Timestamp; import java.util.TimeZone; +import org.threeten.extra.chrono.HybridChronology; + public class ColumnStatisticsImpl implements ColumnStatistics { @@ -1473,16 +1479,23 @@ public class ColumnStatisticsImpl implements ColumnStatistics { private static final class DateStatisticsImpl extends ColumnStatisticsImpl implements DateColumnStatistics { - private Integer minimum = null; - private Integer maximum = null; + private int minimum = Integer.MAX_VALUE; + private int maximum = Integer.MIN_VALUE; + private final Chronology chronology; + + static Chronology getInstance(boolean proleptic) { + return proleptic ? IsoChronology.INSTANCE : HybridChronology.INSTANCE; + } - DateStatisticsImpl() { + DateStatisticsImpl(boolean convertToProleptic) { + this.chronology = getInstance(convertToProleptic); } DateStatisticsImpl(OrcProto.ColumnStatistics stats, boolean writerUsedProlepticGregorian, boolean convertToProlepticGregorian) { super(stats); + this.chronology = getInstance(convertToProlepticGregorian); OrcProto.DateStatistics dateStats = stats.getDateStatistics(); // min,max values serialized/deserialized as int (days since epoch) if (dateStats.hasMaximum()) { @@ -1498,30 +1511,26 @@ public class ColumnStatisticsImpl implements ColumnStatistics { @Override public void reset() { super.reset(); - minimum = null; - maximum = null; + minimum = Integer.MAX_VALUE; + maximum = Integer.MIN_VALUE; } @Override public void updateDate(DateWritable value) { - if (minimum == null) { + if (minimum > value.getDays()) { minimum = value.getDays(); - maximum = value.getDays(); - } else if (minimum > value.getDays()) { - minimum = value.getDays(); - } else if (maximum < value.getDays()) { + } + if (maximum < value.getDays()) { maximum = value.getDays(); } } @Override public void updateDate(int value) { - if (minimum == null) { - minimum = value; - maximum = value; - } else if (minimum > value) { + if (minimum > value) { minimum = value; - } else if (maximum < value) { + } + if (maximum < value) { maximum = value; } } @@ -1530,19 +1539,10 @@ public class ColumnStatisticsImpl implements ColumnStatistics { public void merge(ColumnStatisticsImpl other) { if (other instanceof DateStatisticsImpl) { DateStatisticsImpl dateStats = (DateStatisticsImpl) other; - if (minimum == null) { - minimum = dateStats.minimum; - maximum = dateStats.maximum; - } else if (dateStats.minimum != null) { - if (minimum > dateStats.minimum) { - minimum = dateStats.minimum; - } - if (maximum < dateStats.maximum) { - maximum = dateStats.maximum; - } - } + minimum = Math.min(minimum, dateStats.minimum); + maximum = Math.max(maximum, dateStats.maximum); } else { - if (isStatsExists() && minimum != null) { + if (isStatsExists() && count != 0) { throw new IllegalArgumentException("Incompatible merging of date column statistics"); } } @@ -1554,7 +1554,7 @@ public class ColumnStatisticsImpl implements ColumnStatistics { OrcProto.ColumnStatistics.Builder result = super.serialize(); OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder(); - if (getNumberOfValues() != 0 && minimum != null) { + if (count != 0) { dateStats.setMinimum(minimum); dateStats.setMaximum(maximum); } @@ -1562,24 +1562,41 @@ public class ColumnStatisticsImpl implements ColumnStatistics { return result; } - private transient final DateWritable minDate = new DateWritable(); - private transient final DateWritable maxDate = new DateWritable(); + @Override + public ChronoLocalDate getMinimumLocalDate() { + return count == 0 ? null : chronology.dateEpochDay(minimum); + } + + @Override + public long getMinimumDayOfEpoch() { + return minimum; + } + + @Override + public ChronoLocalDate getMaximumLocalDate() { + return count == 0 ? null : chronology.dateEpochDay(maximum); + } + + @Override + public long getMaximumDayOfEpoch() { + return maximum; + } @Override public Date getMinimum() { - if (minimum == null) { + if (count == 0) { return null; } - minDate.set(minimum); + DateWritable minDate = new DateWritable(minimum); return minDate.get(); } @Override public Date getMaximum() { - if (maximum == null) { + if (count == 0) { return null; } - maxDate.set(maximum); + DateWritable maxDate = new DateWritable(maximum); return maxDate.get(); } @@ -1588,9 +1605,9 @@ public class ColumnStatisticsImpl implements ColumnStatistics { StringBuilder buf = new StringBuilder(super.toString()); if (getNumberOfValues() != 0) { buf.append(" min: "); - buf.append(getMinimum()); + buf.append(getMinimumLocalDate()); buf.append(" max: "); - buf.append(getMaximum()); + buf.append(getMaximumLocalDate()); } return buf.toString(); } @@ -1609,16 +1626,10 @@ public class ColumnStatisticsImpl implements ColumnStatistics { DateStatisticsImpl that = (DateStatisticsImpl) o; - if (minimum != null ? !minimum.equals(that.minimum) : that.minimum != null) { - return false; - } - if (maximum != null ? !maximum.equals(that.maximum) : that.maximum != null) { - return false; - } - if (minDate != null ? !minDate.equals(that.minDate) : that.minDate != null) { + if (minimum != that.minimum) { return false; } - if (maxDate != null ? !maxDate.equals(that.maxDate) : that.maxDate != null) { + if (maximum != that.maximum) { return false; } @@ -1628,10 +1639,8 @@ public class ColumnStatisticsImpl implements ColumnStatistics { @Override public int hashCode() { int result = super.hashCode(); - result = 31 * result + (minimum != null ? minimum.hashCode() : 0); - result = 31 * result + (maximum != null ? maximum.hashCode() : 0); - result = 31 * result + (minDate != null ? minDate.hashCode() : 0); - result = 31 * result + (maxDate != null ? maxDate.hashCode() : 0); + result = 31 * result + minimum; + result = 31 * result + maximum; return result; } } @@ -1983,6 +1992,11 @@ public class ColumnStatisticsImpl implements ColumnStatistics { } public static ColumnStatisticsImpl create(TypeDescription schema) { + return create(schema, false); + } + + public static ColumnStatisticsImpl create(TypeDescription schema, + boolean convertToProleptic) { switch (schema.getCategory()) { case BOOLEAN: return new BooleanStatisticsImpl(); @@ -2008,7 +2022,7 @@ public class ColumnStatisticsImpl implements ColumnStatistics { return new DecimalStatisticsImpl(); } case DATE: - return new DateStatisticsImpl(); + return new DateStatisticsImpl(convertToProleptic); case TIMESTAMP: return new TimestampStatisticsImpl(); case TIMESTAMP_INSTANT: @@ -2022,7 +2036,7 @@ public class ColumnStatisticsImpl implements ColumnStatistics { public static ColumnStatisticsImpl deserialize(TypeDescription schema, OrcProto.ColumnStatistics stats) { - return deserialize(schema, stats, false, false); + return deserialize(schema, stats, true, true); } public static ColumnStatisticsImpl deserialize(TypeDescription schema, diff --git a/java/core/src/java/org/apache/orc/impl/DateUtils.java b/java/core/src/java/org/apache/orc/impl/DateUtils.java index 44c3c7e..8660dc3 100644 --- a/java/core/src/java/org/apache/orc/impl/DateUtils.java +++ b/java/core/src/java/org/apache/orc/impl/DateUtils.java @@ -97,12 +97,12 @@ public class DateUtils { * @return day of epoch in the hybrid Julian/Gregorian */ public static int convertDateToHybrid(int proleptic) { - int hyrbid = proleptic; + int hybrid = proleptic; if (proleptic < SWITCHOVER_DAYS) { String dateStr = PROLEPTIC_DATE_FORMAT.format(LocalDate.ofEpochDay(proleptic)); - hyrbid = (int) LocalDate.from(HYBRID_DATE_FORMAT.parse(dateStr)).toEpochDay(); + hybrid = (int) LocalDate.from(HYBRID_DATE_FORMAT.parse(dateStr)).toEpochDay(); } - return hyrbid; + return hybrid; } /** diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 5851c75..9358bf4 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -17,6 +17,13 @@ */ package org.apache.orc.impl; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.chrono.ChronoLocalDate; +import java.time.format.DateTimeFormatter; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -25,7 +32,6 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.util.TimestampUtils; -import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.io.Text; import org.apache.orc.BooleanColumnStatistics; @@ -55,10 +61,10 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.math.BigDecimal; -import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; +import java.util.Date; import java.util.List; import java.util.TimeZone; @@ -400,8 +406,8 @@ public class RecordReaderImpl implements RecordReader { stats.getMaximum() == null); } else if (index instanceof DateColumnStatistics) { DateColumnStatistics stats = (DateColumnStatistics) index; - java.util.Date min = stats.getMinimum(); - java.util.Date max = stats.getMaximum(); + ChronoLocalDate min = stats.getMinimumLocalDate(); + ChronoLocalDate max = stats.getMaximumLocalDate(); return new ValueRange<>(predicate, min, max, stats.hasNull()); } else if (index instanceof DecimalColumnStatistics) { DecimalColumnStatistics stats = (DecimalColumnStatistics) index; @@ -734,8 +740,8 @@ public class RecordReaderImpl implements RecordReader { result = TruthValue.YES_NO_NULL; } } - } else if (predObj instanceof Date) { - if (bf.testLong(DateWritable.dateToDays((Date) predObj))) { + } else if (predObj instanceof ChronoLocalDate) { + if (bf.testLong(((ChronoLocalDate) predObj).toEpochDay())) { result = TruthValue.YES_NO_NULL; } } else { @@ -783,12 +789,17 @@ public class RecordReaderImpl implements RecordReader { return Boolean.valueOf(obj.toString()); } case DATE: - if (obj instanceof Date) { + if (obj instanceof ChronoLocalDate) { return obj; + } else if (obj instanceof java.sql.Date) { + return ((java.sql.Date) obj).toLocalDate(); + } else if (obj instanceof Date) { + return LocalDateTime.ofInstant(((Date) obj).toInstant(), + ZoneOffset.UTC).toLocalDate(); } else if (obj instanceof String) { - return Date.valueOf((String) obj); + return LocalDate.parse((String) obj); } else if (obj instanceof Timestamp) { - return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L); + return ((Timestamp) obj).toLocalDateTime().toLocalDate(); } // always string, but prevent the comparison to numbers (are they days/seconds/milliseconds?) break; @@ -841,6 +852,11 @@ public class RecordReaderImpl implements RecordReader { } break; case STRING: + if (obj instanceof ChronoLocalDate) { + ChronoLocalDate date = (ChronoLocalDate) obj; + return date.format(DateTimeFormatter.ISO_LOCAL_DATE + .withChronology(date.getChronology())); + } return (obj.toString()); case TIMESTAMP: if (obj instanceof Timestamp) { @@ -857,6 +873,9 @@ public class RecordReaderImpl implements RecordReader { return TimestampUtils.decimalToTimestamp(((HiveDecimalWritable) obj).getHiveDecimal()); } else if (obj instanceof Date) { return new Timestamp(((Date) obj).getTime()); + } else if (obj instanceof ChronoLocalDate) { + return new Timestamp(((ChronoLocalDate) obj).atTime(LocalTime.MIDNIGHT) + .toInstant(ZoneOffset.UTC).getEpochSecond() * 1000L); } // float/double conversion to timestamp is interpreted as seconds whereas integer conversion // to timestamp is interpreted as milliseconds by default. The integer to timestamp casting diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java index b494203..ba41837 100644 --- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java +++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java @@ -89,9 +89,10 @@ public abstract class TreeWriterBase implements TreeWriter { isPresent = new BitFieldWriter(isPresentOutStream, 1); this.foundNulls = false; createBloomFilter = context.getBloomFilterColumns()[id]; - indexStatistics = ColumnStatisticsImpl.create(schema); - stripeColStatistics = ColumnStatisticsImpl.create(schema); - fileStatistics = ColumnStatisticsImpl.create(schema); + boolean proleptic = context.getProlepticGregorian(); + indexStatistics = ColumnStatisticsImpl.create(schema, proleptic); + stripeColStatistics = ColumnStatisticsImpl.create(schema, proleptic); + fileStatistics = ColumnStatisticsImpl.create(schema, proleptic); if (context.buildIndex()) { rowIndex = OrcProto.RowIndex.newBuilder(); rowIndexEntry = OrcProto.RowIndexEntry.newBuilder(); diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java index ff62cca..fe2700f 100644 --- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java +++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java @@ -388,8 +388,10 @@ public class TestColumnStatistics { ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema); stats1.updateDate(new DateWritable(1000)); stats1.updateDate(new DateWritable(100)); + stats1.increment(2); stats2.updateDate(new DateWritable(10)); stats2.updateDate(new DateWritable(2000)); + stats2.increment(2); stats1.merge(stats2); DateColumnStatistics typed = (DateColumnStatistics) stats1; assertEquals(new DateWritable(10).get(), typed.getMinimum()); @@ -397,12 +399,38 @@ public class TestColumnStatistics { stats1.reset(); stats1.updateDate(new DateWritable(-10)); stats1.updateDate(new DateWritable(10000)); + stats1.increment(2); stats1.merge(stats2); assertEquals(new DateWritable(-10).get(), typed.getMinimum()); assertEquals(new DateWritable(10000).get(), typed.getMaximum()); } @Test + public void testLocalDateMerge() throws Exception { + TypeDescription schema = TypeDescription.createDate(); + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema); + stats1.updateDate(1000); + stats1.updateDate(100); + stats1.increment(2); + stats2.updateDate(10); + stats2.updateDate(2000); + stats2.increment(2); + stats1.merge(stats2); + DateColumnStatistics typed = (DateColumnStatistics) stats1; + assertEquals(10, typed.getMinimumDayOfEpoch()); + assertEquals(2000, typed.getMaximumDayOfEpoch()); + stats1.reset(); + stats1.updateDate(-10); + stats1.updateDate(10000); + stats1.increment(2); + stats1.merge(stats2); + assertEquals(-10, typed.getMinimumLocalDate().toEpochDay()); + assertEquals(10000, typed.getMaximumLocalDate().toEpochDay()); + } + + @Test public void testTimestampMergeUTC() throws Exception { TypeDescription schema = TypeDescription.createTimestamp(); diff --git a/java/core/src/test/org/apache/orc/TestProlepticConversions.java b/java/core/src/test/org/apache/orc/TestProlepticConversions.java index 95bcb71..b97a6e4 100644 --- a/java/core/src/test/org/apache/orc/TestProlepticConversions.java +++ b/java/core/src/test/org/apache/orc/TestProlepticConversions.java @@ -17,6 +17,9 @@ */ package org.apache.orc; +import java.time.chrono.Chronology; +import java.time.chrono.IsoChronology; +import java.time.format.DateTimeFormatter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -45,6 +48,7 @@ import java.util.GregorianCalendar; import java.util.List; import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import org.threeten.extra.chrono.HybridChronology; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -126,11 +130,12 @@ public class TestProlepticConversions { t.changeCalendar(writerProlepticGregorian, false); i.changeCalendar(writerProlepticGregorian, false); GregorianCalendar cal = writerProlepticGregorian ? PROLEPTIC : HYBRID; - SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal); SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal); + Chronology writerChronology = writerProlepticGregorian + ? IsoChronology.INSTANCE : HybridChronology.INSTANCE; for(int r=0; r < batch.size; ++r) { - d.vector[r] = TimeUnit.MILLISECONDS.toDays( - dateFormat.parse(String.format("%04d-01-23", r * 2 + 1)).getTime()); + d.vector[r] = writerChronology.date(r * 2 + 1, 1, 23) + .toEpochDay(); Date val = timeFormat.parse( String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r % 24)); t.time[r] = val.getTime(); @@ -151,16 +156,18 @@ public class TestProlepticConversions { TimestampColumnVector t = (TimestampColumnVector) batch.cols[1]; TimestampColumnVector i = (TimestampColumnVector) batch.cols[2]; GregorianCalendar cal = readerProlepticGregorian ? PROLEPTIC : HYBRID; - SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal); SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal); + Chronology readerChronology = readerProlepticGregorian + ? IsoChronology.INSTANCE : HybridChronology.INSTANCE; + DateTimeFormatter dateFormat = DateTimeFormatter.ISO_LOCAL_DATE.withChronology(readerChronology); // Check the file statistics ColumnStatistics[] colStats = reader.getStatistics(); DateColumnStatistics dStats = (DateColumnStatistics) colStats[1]; TimestampColumnStatistics tStats = (TimestampColumnStatistics) colStats[2]; TimestampColumnStatistics iStats = (TimestampColumnStatistics) colStats[3]; - assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum())); - assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum())); + assertEquals("0001-01-23", dStats.getMinimumLocalDate().format(dateFormat)); + assertEquals("2047-01-23", dStats.getMaximumLocalDate().format(dateFormat)); assertEquals("0001-03-21 00:12:34", timeFormat.format(tStats.getMinimum())); assertEquals("2047-03-21 15:12:34", timeFormat.format(tStats.getMaximum())); assertEquals("0001-03-21 00:12:34", timeFormat.format(iStats.getMinimum())); @@ -173,8 +180,8 @@ public class TestProlepticConversions { dStats = (DateColumnStatistics) colStats[1]; tStats = (TimestampColumnStatistics) colStats[2]; iStats = (TimestampColumnStatistics) colStats[3]; - assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum())); - assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum())); + assertEquals("0001-01-23", dStats.getMinimumLocalDate().format(dateFormat)); + assertEquals("2047-01-23", dStats.getMaximumLocalDate().format(dateFormat)); assertEquals("0001-03-21 00:12:34", timeFormat.format(tStats.getMinimum())); assertEquals("2047-03-21 15:12:34", timeFormat.format(tStats.getMaximum())); assertEquals("0001-03-21 00:12:34", timeFormat.format(iStats.getMinimum())); @@ -190,8 +197,8 @@ public class TestProlepticConversions { for(int r=0; r < batch.size; ++r) { String expectedD = String.format("%04d-01-23", r * 2 + 1); String expectedT = String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r % 24); - assertEquals("row " + r, expectedD, dateFormat.format( - new Date(TimeUnit.DAYS.toMillis(d.vector[r])))); + assertEquals("row " + r, expectedD, readerChronology.dateEpochDay(d.vector[r]) + .format(dateFormat)); assertEquals("row " + r, expectedT, timeFormat.format(t.asScratchTimestamp(r))); assertEquals("row " + r, expectedT, timeFormat.format(i.asScratchTimestamp(r))); } diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java index dc12715..6ca24a6 100644 --- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java +++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java @@ -41,6 +41,10 @@ import java.sql.Timestamp; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.chrono.ChronoLocalDate; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -75,7 +79,6 @@ import org.apache.orc.impl.RecordReaderImpl.Location; import org.apache.orc.impl.RecordReaderImpl.SargApplier; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; -import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.orc.ColumnStatistics; @@ -89,6 +92,13 @@ import org.junit.Test; public class TestRecordReaderImpl { + // This is a work around until we update storage-api to allow ChronoLocalDate in + // predicates. + static Date toDate(ChronoLocalDate date) { + return new Date(date.atTime(LocalTime.MIDNIGHT).atZone(ZoneId.systemDefault()) + .toEpochSecond() * 1000); + } + @Test public void testFindColumn() throws Exception { Configuration conf = new Configuration(); @@ -148,6 +158,9 @@ public class TestRecordReaderImpl { String columnName, Object literal, List<Object> literalList) { + if (literal instanceof ChronoLocalDate) { + literal = toDate((ChronoLocalDate) literal); + } return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName, literal, literalList); } @@ -448,7 +461,7 @@ public class TestRecordReaderImpl { OrcProto.DateStatistics.Builder dateStats = OrcProto.DateStatistics.newBuilder(); dateStats.setMinimum(min); dateStats.setMaximum(max); - return OrcProto.ColumnStatistics.newBuilder() + return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(1) .setDateStatistics(dateStats.build()).build(); } @@ -621,7 +634,7 @@ public class TestRecordReaderImpl { // Integer stats will not be converted date because of days/seconds/millis ambiguity pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null); try { evaluateInteger(createIntStats(10, 100), pred); fail("evaluate should throw"); @@ -664,7 +677,7 @@ public class TestRecordReaderImpl { // Double is not converted to date type because of days/seconds/millis ambiguity pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null); try { evaluateDouble(createDoubleStats(10.0, 100.0), pred); fail("evaluate should throw"); @@ -707,7 +720,7 @@ public class TestRecordReaderImpl { // IllegalArgumentException is thrown when converting String to Date, hence YES_NO pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null); + PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(100), null); assertEquals(TruthValue.YES_NO, evaluateInteger(createDateStats(10, 1000), pred)); @@ -735,7 +748,7 @@ public class TestRecordReaderImpl { evaluateInteger(createDateStats(10, 100), pred); fail("evaluate should throw"); } catch (RecordReaderImpl.SargCastException ia) { - assertEquals("ORC SARGS could not convert from Date to LONG", ia.getMessage()); + assertEquals("ORC SARGS could not convert from LocalDate to LONG", ia.getMessage()); } // Date to Float conversion is also not possible. @@ -745,7 +758,7 @@ public class TestRecordReaderImpl { evaluateInteger(createDateStats(10, 100), pred); fail("evaluate should throw"); } catch (RecordReaderImpl.SargCastException ia) { - assertEquals("ORC SARGS could not convert from Date to FLOAT", ia.getMessage()); + assertEquals("ORC SARGS could not convert from LocalDate to FLOAT", ia.getMessage()); } pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -779,12 +792,12 @@ public class TestRecordReaderImpl { evaluateInteger(createDateStats(10, 100), pred)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null); assertEquals(TruthValue.YES_NO, evaluateInteger(createDateStats(10, 100), pred)); pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null); + PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(150), null); assertEquals(TruthValue.NO, evaluateInteger(createDateStats(10, 100), pred)); @@ -795,7 +808,7 @@ public class TestRecordReaderImpl { evaluateInteger(createDateStats(10, 100), pred); fail("evaluate should throw"); } catch (RecordReaderImpl.SargCastException ia) { - assertEquals("ORC SARGS could not convert from Date to DECIMAL", ia.getMessage()); + assertEquals("ORC SARGS could not convert from LocalDate to DECIMAL", ia.getMessage()); } pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -829,7 +842,7 @@ public class TestRecordReaderImpl { // Decimal to Date not possible. pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null); try { evaluateInteger(createDecimalStats("10.0", "100.0"), pred); fail("evaluate should throw"); @@ -1704,15 +1717,15 @@ public class TestRecordReaderImpl { public void testDateWritableNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", - new DateWritable(15).get(), null); + LocalDate.ofEpochDay(15), null); BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { - bf.addLong((new DateWritable(i)).getDays()); + bf.addLong(i); } ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDateStats(10, 100)); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); - bf.addLong((new DateWritable(15)).getDays()); + bf.addLong(15); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } @@ -1720,37 +1733,37 @@ public class TestRecordReaderImpl { public void testDateWritableEqualsBloomFilter() throws Exception { PredicateLeaf pred = createPredicateLeaf( PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x", - new DateWritable(15).get(), null); + LocalDate.ofEpochDay(15), null); BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { - bf.addLong((new DateWritable(i)).getDays()); + bf.addLong(i); } ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDateStats(10, 100)); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); - bf.addLong((new DateWritable(15)).getDays()); + bf.addLong(15); assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); } @Test public void testDateWritableInBloomFilter() throws Exception { - List<Object> args = new ArrayList<Object>(); - args.add(new DateWritable(15).get()); - args.add(new DateWritable(19).get()); + List<Object> args = new ArrayList<>(); + args.add(toDate(LocalDate.ofEpochDay(15))); + args.add(toDate(LocalDate.ofEpochDay(19))); PredicateLeaf pred = createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE, "x", null, args); BloomFilter bf = new BloomFilter(10000); for (int i = 20; i < 1000; i++) { - bf.addLong((new DateWritable(i)).getDays()); + bf.addLong(i); } ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDateStats(10, 100)); assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); - bf.addLong((new DateWritable(19)).getDays()); + bf.addLong(19); assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); - bf.addLong((new DateWritable(15)).getDays()); + bf.addLong(15); assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf)); }
