This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.6 by this push:
new e8ca08d ORC-661: Use ChronoLocalDate and day of epoch instead of
java's Date for column statistics.
e8ca08d is described below
commit e8ca08dec3fbe7002f39cf11e18532e57e329e84
Author: Owen O'Malley <[email protected]>
AuthorDate: Fri Aug 21 16:51:08 2020 -0700
ORC-661: Use ChronoLocalDate and day of epoch instead of java's Date for
column statistics.
Fixes #540
Signed-off-by: Owen O'Malley <[email protected]>
---
.../java/org/apache/orc/DateColumnStatistics.java | 29 ++++++
.../org/apache/orc/impl/ColumnStatisticsImpl.java | 114 ++++++++++++---------
.../src/java/org/apache/orc/impl/DateUtils.java | 6 +-
.../java/org/apache/orc/impl/RecordReaderImpl.java | 39 +++++--
.../org/apache/orc/impl/writer/TreeWriterBase.java | 7 +-
.../test/org/apache/orc/TestColumnStatistics.java | 28 +++++
.../org/apache/orc/TestProlepticConversions.java | 27 +++--
.../org/apache/orc/impl/TestRecordReaderImpl.java | 59 ++++++-----
8 files changed, 211 insertions(+), 98 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/DateColumnStatistics.java
b/java/core/src/java/org/apache/orc/DateColumnStatistics.java
index 59efc16..7d810b1 100644
--- a/java/core/src/java/org/apache/orc/DateColumnStatistics.java
+++ b/java/core/src/java/org/apache/orc/DateColumnStatistics.java
@@ -17,21 +17,50 @@
*/
package org.apache.orc;
+import java.time.LocalDate;
+import java.time.chrono.ChronoLocalDate;
import java.util.Date;
/**
* Statistics for DATE columns.
*/
public interface DateColumnStatistics extends ColumnStatistics {
+
+ /**
+ * Get the minimum value for the column.
+ * @return minimum value as a LocalDate
+ */
+ ChronoLocalDate getMinimumLocalDate();
+
+ /**
+ * Get the minimum value for the column.
+ * @return minimum value as days since epoch (1 Jan 1970)
+ */
+ long getMinimumDayOfEpoch();
+
+ /**
+ * Get the maximum value for the column.
+ * @return maximum value as a LocalDate
+ */
+ ChronoLocalDate getMaximumLocalDate();
+
+ /**
+ * Get the maximum value for the column.
+ * @return maximum value as days since epoch (1 Jan 1970)
+ */
+ long getMaximumDayOfEpoch();
+
/**
* Get the minimum value for the column.
* @return minimum value
+ * @deprecated Use #getMinimumLocalDate instead
*/
Date getMinimum();
/**
* Get the maximum value for the column.
* @return maximum value
+ * @deprecated Use #getMaximumLocalDate instead
*/
Date getMaximum();
}
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 263c3bb..587e33b 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -17,6 +17,10 @@
*/
package org.apache.orc.impl;
+import java.time.LocalDate;
+import java.time.chrono.ChronoLocalDate;
+import java.time.chrono.Chronology;
+import java.time.chrono.IsoChronology;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -39,6 +43,8 @@ import org.apache.orc.TypeDescription;
import java.sql.Date;
import java.sql.Timestamp;
import java.util.TimeZone;
+import org.threeten.extra.chrono.HybridChronology;
+
public class ColumnStatisticsImpl implements ColumnStatistics {
@@ -1473,16 +1479,23 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
private static final class DateStatisticsImpl extends ColumnStatisticsImpl
implements DateColumnStatistics {
- private Integer minimum = null;
- private Integer maximum = null;
+ private int minimum = Integer.MAX_VALUE;
+ private int maximum = Integer.MIN_VALUE;
+ private final Chronology chronology;
+
+ static Chronology getInstance(boolean proleptic) {
+ return proleptic ? IsoChronology.INSTANCE : HybridChronology.INSTANCE;
+ }
- DateStatisticsImpl() {
+ DateStatisticsImpl(boolean convertToProleptic) {
+ this.chronology = getInstance(convertToProleptic);
}
DateStatisticsImpl(OrcProto.ColumnStatistics stats,
boolean writerUsedProlepticGregorian,
boolean convertToProlepticGregorian) {
super(stats);
+ this.chronology = getInstance(convertToProlepticGregorian);
OrcProto.DateStatistics dateStats = stats.getDateStatistics();
// min,max values serialized/deserialized as int (days since epoch)
if (dateStats.hasMaximum()) {
@@ -1498,30 +1511,26 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
@Override
public void reset() {
super.reset();
- minimum = null;
- maximum = null;
+ minimum = Integer.MAX_VALUE;
+ maximum = Integer.MIN_VALUE;
}
@Override
public void updateDate(DateWritable value) {
- if (minimum == null) {
+ if (minimum > value.getDays()) {
minimum = value.getDays();
- maximum = value.getDays();
- } else if (minimum > value.getDays()) {
- minimum = value.getDays();
- } else if (maximum < value.getDays()) {
+ }
+ if (maximum < value.getDays()) {
maximum = value.getDays();
}
}
@Override
public void updateDate(int value) {
- if (minimum == null) {
- minimum = value;
- maximum = value;
- } else if (minimum > value) {
+ if (minimum > value) {
minimum = value;
- } else if (maximum < value) {
+ }
+ if (maximum < value) {
maximum = value;
}
}
@@ -1530,19 +1539,10 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
public void merge(ColumnStatisticsImpl other) {
if (other instanceof DateStatisticsImpl) {
DateStatisticsImpl dateStats = (DateStatisticsImpl) other;
- if (minimum == null) {
- minimum = dateStats.minimum;
- maximum = dateStats.maximum;
- } else if (dateStats.minimum != null) {
- if (minimum > dateStats.minimum) {
- minimum = dateStats.minimum;
- }
- if (maximum < dateStats.maximum) {
- maximum = dateStats.maximum;
- }
- }
+ minimum = Math.min(minimum, dateStats.minimum);
+ maximum = Math.max(maximum, dateStats.maximum);
} else {
- if (isStatsExists() && minimum != null) {
+ if (isStatsExists() && count != 0) {
throw new IllegalArgumentException("Incompatible merging of date
column statistics");
}
}
@@ -1554,7 +1554,7 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
OrcProto.ColumnStatistics.Builder result = super.serialize();
OrcProto.DateStatistics.Builder dateStats =
OrcProto.DateStatistics.newBuilder();
- if (getNumberOfValues() != 0 && minimum != null) {
+ if (count != 0) {
dateStats.setMinimum(minimum);
dateStats.setMaximum(maximum);
}
@@ -1562,24 +1562,41 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
return result;
}
- private transient final DateWritable minDate = new DateWritable();
- private transient final DateWritable maxDate = new DateWritable();
+ @Override
+ public ChronoLocalDate getMinimumLocalDate() {
+ return count == 0 ? null : chronology.dateEpochDay(minimum);
+ }
+
+ @Override
+ public long getMinimumDayOfEpoch() {
+ return minimum;
+ }
+
+ @Override
+ public ChronoLocalDate getMaximumLocalDate() {
+ return count == 0 ? null : chronology.dateEpochDay(maximum);
+ }
+
+ @Override
+ public long getMaximumDayOfEpoch() {
+ return maximum;
+ }
@Override
public Date getMinimum() {
- if (minimum == null) {
+ if (count == 0) {
return null;
}
- minDate.set(minimum);
+ DateWritable minDate = new DateWritable(minimum);
return minDate.get();
}
@Override
public Date getMaximum() {
- if (maximum == null) {
+ if (count == 0) {
return null;
}
- maxDate.set(maximum);
+ DateWritable maxDate = new DateWritable(maximum);
return maxDate.get();
}
@@ -1588,9 +1605,9 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
StringBuilder buf = new StringBuilder(super.toString());
if (getNumberOfValues() != 0) {
buf.append(" min: ");
- buf.append(getMinimum());
+ buf.append(getMinimumLocalDate());
buf.append(" max: ");
- buf.append(getMaximum());
+ buf.append(getMaximumLocalDate());
}
return buf.toString();
}
@@ -1609,16 +1626,10 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
DateStatisticsImpl that = (DateStatisticsImpl) o;
- if (minimum != null ? !minimum.equals(that.minimum) : that.minimum !=
null) {
- return false;
- }
- if (maximum != null ? !maximum.equals(that.maximum) : that.maximum !=
null) {
- return false;
- }
- if (minDate != null ? !minDate.equals(that.minDate) : that.minDate !=
null) {
+ if (minimum != that.minimum) {
return false;
}
- if (maxDate != null ? !maxDate.equals(that.maxDate) : that.maxDate !=
null) {
+ if (maximum != that.maximum) {
return false;
}
@@ -1628,10 +1639,8 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
@Override
public int hashCode() {
int result = super.hashCode();
- result = 31 * result + (minimum != null ? minimum.hashCode() : 0);
- result = 31 * result + (maximum != null ? maximum.hashCode() : 0);
- result = 31 * result + (minDate != null ? minDate.hashCode() : 0);
- result = 31 * result + (maxDate != null ? maxDate.hashCode() : 0);
+ result = 31 * result + minimum;
+ result = 31 * result + maximum;
return result;
}
}
@@ -1983,6 +1992,11 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
}
public static ColumnStatisticsImpl create(TypeDescription schema) {
+ return create(schema, false);
+ }
+
+ public static ColumnStatisticsImpl create(TypeDescription schema,
+ boolean convertToProleptic) {
switch (schema.getCategory()) {
case BOOLEAN:
return new BooleanStatisticsImpl();
@@ -2008,7 +2022,7 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
return new DecimalStatisticsImpl();
}
case DATE:
- return new DateStatisticsImpl();
+ return new DateStatisticsImpl(convertToProleptic);
case TIMESTAMP:
return new TimestampStatisticsImpl();
case TIMESTAMP_INSTANT:
@@ -2022,7 +2036,7 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
public static ColumnStatisticsImpl deserialize(TypeDescription schema,
OrcProto.ColumnStatistics
stats) {
- return deserialize(schema, stats, false, false);
+ return deserialize(schema, stats, true, true);
}
public static ColumnStatisticsImpl deserialize(TypeDescription schema,
diff --git a/java/core/src/java/org/apache/orc/impl/DateUtils.java
b/java/core/src/java/org/apache/orc/impl/DateUtils.java
index 44c3c7e..8660dc3 100644
--- a/java/core/src/java/org/apache/orc/impl/DateUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/DateUtils.java
@@ -97,12 +97,12 @@ public class DateUtils {
* @return day of epoch in the hybrid Julian/Gregorian
*/
public static int convertDateToHybrid(int proleptic) {
- int hyrbid = proleptic;
+ int hybrid = proleptic;
if (proleptic < SWITCHOVER_DAYS) {
String dateStr =
PROLEPTIC_DATE_FORMAT.format(LocalDate.ofEpochDay(proleptic));
- hyrbid = (int)
LocalDate.from(HYBRID_DATE_FORMAT.parse(dateStr)).toEpochDay();
+ hybrid = (int)
LocalDate.from(HYBRID_DATE_FORMAT.parse(dateStr)).toEpochDay();
}
- return hyrbid;
+ return hybrid;
}
/**
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 5851c75..4dad92d 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -17,6 +17,13 @@
*/
package org.apache.orc.impl;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.chrono.ChronoLocalDate;
+import java.time.format.DateTimeFormatter;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -25,7 +32,6 @@ import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
import org.apache.hadoop.hive.ql.util.TimestampUtils;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.Text;
import org.apache.orc.BooleanColumnStatistics;
@@ -48,6 +54,8 @@ import org.apache.orc.TimestampColumnStatistics;
import org.apache.orc.TypeDescription;
import org.apache.orc.impl.reader.ReaderEncryption;
import org.apache.orc.impl.reader.StripePlanner;
+import org.apache.orc.impl.reader.tree.BatchReader;
+import org.apache.orc.impl.writer.DateTreeWriter;
import org.apache.orc.util.BloomFilter;
import org.apache.orc.util.BloomFilterIO;
import org.slf4j.Logger;
@@ -55,10 +63,10 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.math.BigDecimal;
-import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Date;
import java.util.List;
import java.util.TimeZone;
@@ -400,8 +408,8 @@ public class RecordReaderImpl implements RecordReader {
stats.getMaximum() == null);
} else if (index instanceof DateColumnStatistics) {
DateColumnStatistics stats = (DateColumnStatistics) index;
- java.util.Date min = stats.getMinimum();
- java.util.Date max = stats.getMaximum();
+ ChronoLocalDate min = stats.getMinimumLocalDate();
+ ChronoLocalDate max = stats.getMaximumLocalDate();
return new ValueRange<>(predicate, min, max, stats.hasNull());
} else if (index instanceof DecimalColumnStatistics) {
DecimalColumnStatistics stats = (DecimalColumnStatistics) index;
@@ -734,8 +742,8 @@ public class RecordReaderImpl implements RecordReader {
result = TruthValue.YES_NO_NULL;
}
}
- } else if (predObj instanceof Date) {
- if (bf.testLong(DateWritable.dateToDays((Date) predObj))) {
+ } else if (predObj instanceof ChronoLocalDate) {
+ if (bf.testLong(((ChronoLocalDate) predObj).toEpochDay())) {
result = TruthValue.YES_NO_NULL;
}
} else {
@@ -783,12 +791,17 @@ public class RecordReaderImpl implements RecordReader {
return Boolean.valueOf(obj.toString());
}
case DATE:
- if (obj instanceof Date) {
+ if (obj instanceof ChronoLocalDate) {
return obj;
+ } else if (obj instanceof java.sql.Date) {
+ return ((java.sql.Date) obj).toLocalDate();
+ } else if (obj instanceof Date) {
+ return LocalDateTime.ofInstant(((Date) obj).toInstant(),
+ ZoneOffset.UTC).toLocalDate();
} else if (obj instanceof String) {
- return Date.valueOf((String) obj);
+ return LocalDate.parse((String) obj);
} else if (obj instanceof Timestamp) {
- return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L);
+ return ((Timestamp) obj).toLocalDateTime().toLocalDate();
}
// always string, but prevent the comparison to numbers (are they
days/seconds/milliseconds?)
break;
@@ -841,6 +854,11 @@ public class RecordReaderImpl implements RecordReader {
}
break;
case STRING:
+ if (obj instanceof ChronoLocalDate) {
+ ChronoLocalDate date = (ChronoLocalDate) obj;
+ return date.format(DateTimeFormatter.ISO_LOCAL_DATE
+ .withChronology(date.getChronology()));
+ }
return (obj.toString());
case TIMESTAMP:
if (obj instanceof Timestamp) {
@@ -857,6 +875,9 @@ public class RecordReaderImpl implements RecordReader {
return TimestampUtils.decimalToTimestamp(((HiveDecimalWritable)
obj).getHiveDecimal());
} else if (obj instanceof Date) {
return new Timestamp(((Date) obj).getTime());
+ } else if (obj instanceof ChronoLocalDate) {
+ return new Timestamp(((ChronoLocalDate)
obj).atTime(LocalTime.MIDNIGHT)
+ .toInstant(ZoneOffset.UTC).getEpochSecond() * 1000L);
}
// float/double conversion to timestamp is interpreted as seconds
whereas integer conversion
// to timestamp is interpreted as milliseconds by default. The integer
to timestamp casting
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
index b494203..ba41837 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
@@ -89,9 +89,10 @@ public abstract class TreeWriterBase implements TreeWriter {
isPresent = new BitFieldWriter(isPresentOutStream, 1);
this.foundNulls = false;
createBloomFilter = context.getBloomFilterColumns()[id];
- indexStatistics = ColumnStatisticsImpl.create(schema);
- stripeColStatistics = ColumnStatisticsImpl.create(schema);
- fileStatistics = ColumnStatisticsImpl.create(schema);
+ boolean proleptic = context.getProlepticGregorian();
+ indexStatistics = ColumnStatisticsImpl.create(schema, proleptic);
+ stripeColStatistics = ColumnStatisticsImpl.create(schema, proleptic);
+ fileStatistics = ColumnStatisticsImpl.create(schema, proleptic);
if (context.buildIndex()) {
rowIndex = OrcProto.RowIndex.newBuilder();
rowIndexEntry = OrcProto.RowIndexEntry.newBuilder();
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index ff62cca..fe2700f 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -388,8 +388,10 @@ public class TestColumnStatistics {
ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.updateDate(new DateWritable(1000));
stats1.updateDate(new DateWritable(100));
+ stats1.increment(2);
stats2.updateDate(new DateWritable(10));
stats2.updateDate(new DateWritable(2000));
+ stats2.increment(2);
stats1.merge(stats2);
DateColumnStatistics typed = (DateColumnStatistics) stats1;
assertEquals(new DateWritable(10).get(), typed.getMinimum());
@@ -397,12 +399,38 @@ public class TestColumnStatistics {
stats1.reset();
stats1.updateDate(new DateWritable(-10));
stats1.updateDate(new DateWritable(10000));
+ stats1.increment(2);
stats1.merge(stats2);
assertEquals(new DateWritable(-10).get(), typed.getMinimum());
assertEquals(new DateWritable(10000).get(), typed.getMaximum());
}
@Test
+ public void testLocalDateMerge() throws Exception {
+ TypeDescription schema = TypeDescription.createDate();
+
+ ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
+ ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
+ stats1.updateDate(1000);
+ stats1.updateDate(100);
+ stats1.increment(2);
+ stats2.updateDate(10);
+ stats2.updateDate(2000);
+ stats2.increment(2);
+ stats1.merge(stats2);
+ DateColumnStatistics typed = (DateColumnStatistics) stats1;
+ assertEquals(10, typed.getMinimumDayOfEpoch());
+ assertEquals(2000, typed.getMaximumDayOfEpoch());
+ stats1.reset();
+ stats1.updateDate(-10);
+ stats1.updateDate(10000);
+ stats1.increment(2);
+ stats1.merge(stats2);
+ assertEquals(-10, typed.getMinimumLocalDate().toEpochDay());
+ assertEquals(10000, typed.getMaximumLocalDate().toEpochDay());
+ }
+
+ @Test
public void testTimestampMergeUTC() throws Exception {
TypeDescription schema = TypeDescription.createTimestamp();
diff --git a/java/core/src/test/org/apache/orc/TestProlepticConversions.java
b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
index 95bcb71..b97a6e4 100644
--- a/java/core/src/test/org/apache/orc/TestProlepticConversions.java
+++ b/java/core/src/test/org/apache/orc/TestProlepticConversions.java
@@ -17,6 +17,9 @@
*/
package org.apache.orc;
+import java.time.chrono.Chronology;
+import java.time.chrono.IsoChronology;
+import java.time.format.DateTimeFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -45,6 +48,7 @@ import java.util.GregorianCalendar;
import java.util.List;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
+import org.threeten.extra.chrono.HybridChronology;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -126,11 +130,12 @@ public class TestProlepticConversions {
t.changeCalendar(writerProlepticGregorian, false);
i.changeCalendar(writerProlepticGregorian, false);
GregorianCalendar cal = writerProlepticGregorian ? PROLEPTIC : HYBRID;
- SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal);
SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal);
+ Chronology writerChronology = writerProlepticGregorian
+ ? IsoChronology.INSTANCE : HybridChronology.INSTANCE;
for(int r=0; r < batch.size; ++r) {
- d.vector[r] = TimeUnit.MILLISECONDS.toDays(
- dateFormat.parse(String.format("%04d-01-23", r * 2 +
1)).getTime());
+ d.vector[r] = writerChronology.date(r * 2 + 1, 1, 23)
+ .toEpochDay();
Date val = timeFormat.parse(
String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r % 24));
t.time[r] = val.getTime();
@@ -151,16 +156,18 @@ public class TestProlepticConversions {
TimestampColumnVector t = (TimestampColumnVector) batch.cols[1];
TimestampColumnVector i = (TimestampColumnVector) batch.cols[2];
GregorianCalendar cal = readerProlepticGregorian ? PROLEPTIC : HYBRID;
- SimpleDateFormat dateFormat = createParser("yyyy-MM-dd", cal);
SimpleDateFormat timeFormat = createParser("yyyy-MM-dd HH:mm:ss", cal);
+ Chronology readerChronology = readerProlepticGregorian
+ ? IsoChronology.INSTANCE : HybridChronology.INSTANCE;
+ DateTimeFormatter dateFormat =
DateTimeFormatter.ISO_LOCAL_DATE.withChronology(readerChronology);
// Check the file statistics
ColumnStatistics[] colStats = reader.getStatistics();
DateColumnStatistics dStats = (DateColumnStatistics) colStats[1];
TimestampColumnStatistics tStats = (TimestampColumnStatistics)
colStats[2];
TimestampColumnStatistics iStats = (TimestampColumnStatistics)
colStats[3];
- assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum()));
- assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum()));
+ assertEquals("0001-01-23",
dStats.getMinimumLocalDate().format(dateFormat));
+ assertEquals("2047-01-23",
dStats.getMaximumLocalDate().format(dateFormat));
assertEquals("0001-03-21 00:12:34",
timeFormat.format(tStats.getMinimum()));
assertEquals("2047-03-21 15:12:34",
timeFormat.format(tStats.getMaximum()));
assertEquals("0001-03-21 00:12:34",
timeFormat.format(iStats.getMinimum()));
@@ -173,8 +180,8 @@ public class TestProlepticConversions {
dStats = (DateColumnStatistics) colStats[1];
tStats = (TimestampColumnStatistics) colStats[2];
iStats = (TimestampColumnStatistics) colStats[3];
- assertEquals("0001-01-23", dateFormat.format(dStats.getMinimum()));
- assertEquals("2047-01-23", dateFormat.format(dStats.getMaximum()));
+ assertEquals("0001-01-23",
dStats.getMinimumLocalDate().format(dateFormat));
+ assertEquals("2047-01-23",
dStats.getMaximumLocalDate().format(dateFormat));
assertEquals("0001-03-21 00:12:34",
timeFormat.format(tStats.getMinimum()));
assertEquals("2047-03-21 15:12:34",
timeFormat.format(tStats.getMaximum()));
assertEquals("0001-03-21 00:12:34",
timeFormat.format(iStats.getMinimum()));
@@ -190,8 +197,8 @@ public class TestProlepticConversions {
for(int r=0; r < batch.size; ++r) {
String expectedD = String.format("%04d-01-23", r * 2 + 1);
String expectedT = String.format("%04d-03-21 %02d:12:34", 2 * r + 1, r
% 24);
- assertEquals("row " + r, expectedD, dateFormat.format(
- new Date(TimeUnit.DAYS.toMillis(d.vector[r]))));
+ assertEquals("row " + r, expectedD,
readerChronology.dateEpochDay(d.vector[r])
+ .format(dateFormat));
assertEquals("row " + r, expectedT,
timeFormat.format(t.asScratchTimestamp(r)));
assertEquals("row " + r, expectedT,
timeFormat.format(i.asScratchTimestamp(r)));
}
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index dc12715..6ca24a6 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -41,6 +41,10 @@ import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.chrono.ChronoLocalDate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -75,7 +79,6 @@ import org.apache.orc.impl.RecordReaderImpl.Location;
import org.apache.orc.impl.RecordReaderImpl.SargApplier;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.orc.ColumnStatistics;
@@ -89,6 +92,13 @@ import org.junit.Test;
public class TestRecordReaderImpl {
+ // This is a work around until we update storage-api to allow
ChronoLocalDate in
+ // predicates.
+ static Date toDate(ChronoLocalDate date) {
+ return new
Date(date.atTime(LocalTime.MIDNIGHT).atZone(ZoneId.systemDefault())
+ .toEpochSecond() * 1000);
+ }
+
@Test
public void testFindColumn() throws Exception {
Configuration conf = new Configuration();
@@ -148,6 +158,9 @@ public class TestRecordReaderImpl {
String columnName,
Object literal,
List<Object> literalList) {
+ if (literal instanceof ChronoLocalDate) {
+ literal = toDate((ChronoLocalDate) literal);
+ }
return new SearchArgumentImpl.PredicateLeafImpl(operator, type, columnName,
literal, literalList);
}
@@ -448,7 +461,7 @@ public class TestRecordReaderImpl {
OrcProto.DateStatistics.Builder dateStats =
OrcProto.DateStatistics.newBuilder();
dateStats.setMinimum(min);
dateStats.setMaximum(max);
- return OrcProto.ColumnStatistics.newBuilder()
+ return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(1)
.setDateStatistics(dateStats.build()).build();
}
@@ -621,7 +634,7 @@ public class TestRecordReaderImpl {
// Integer stats will not be converted date because of days/seconds/millis
ambiguity
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+ PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
try {
evaluateInteger(createIntStats(10, 100), pred);
fail("evaluate should throw");
@@ -664,7 +677,7 @@ public class TestRecordReaderImpl {
// Double is not converted to date type because of days/seconds/millis
ambiguity
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+ PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
try {
evaluateDouble(createDoubleStats(10.0, 100.0), pred);
fail("evaluate should throw");
@@ -707,7 +720,7 @@ public class TestRecordReaderImpl {
// IllegalArgumentException is thrown when converting String to Date,
hence YES_NO
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null);
+ PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(100), null);
assertEquals(TruthValue.YES_NO,
evaluateInteger(createDateStats(10, 1000), pred));
@@ -735,7 +748,7 @@ public class TestRecordReaderImpl {
evaluateInteger(createDateStats(10, 100), pred);
fail("evaluate should throw");
} catch (RecordReaderImpl.SargCastException ia) {
- assertEquals("ORC SARGS could not convert from Date to LONG",
ia.getMessage());
+ assertEquals("ORC SARGS could not convert from LocalDate to LONG",
ia.getMessage());
}
// Date to Float conversion is also not possible.
@@ -745,7 +758,7 @@ public class TestRecordReaderImpl {
evaluateInteger(createDateStats(10, 100), pred);
fail("evaluate should throw");
} catch (RecordReaderImpl.SargCastException ia) {
- assertEquals("ORC SARGS could not convert from Date to FLOAT",
ia.getMessage());
+ assertEquals("ORC SARGS could not convert from LocalDate to FLOAT",
ia.getMessage());
}
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
@@ -779,12 +792,12 @@ public class TestRecordReaderImpl {
evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+ PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
assertEquals(TruthValue.YES_NO,
evaluateInteger(createDateStats(10, 100), pred));
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null);
+ PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(150), null);
assertEquals(TruthValue.NO,
evaluateInteger(createDateStats(10, 100), pred));
@@ -795,7 +808,7 @@ public class TestRecordReaderImpl {
evaluateInteger(createDateStats(10, 100), pred);
fail("evaluate should throw");
} catch (RecordReaderImpl.SargCastException ia) {
- assertEquals("ORC SARGS could not convert from Date to DECIMAL",
ia.getMessage());
+ assertEquals("ORC SARGS could not convert from LocalDate to DECIMAL",
ia.getMessage());
}
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
@@ -829,7 +842,7 @@ public class TestRecordReaderImpl {
// Decimal to Date not possible.
pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
- PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
+ PredicateLeaf.Type.DATE, "x", LocalDate.ofEpochDay(15), null);
try {
evaluateInteger(createDecimalStats("10.0", "100.0"), pred);
fail("evaluate should throw");
@@ -1704,15 +1717,15 @@ public class TestRecordReaderImpl {
public void testDateWritableNullSafeEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x",
- new DateWritable(15).get(), null);
+ LocalDate.ofEpochDay(15), null);
BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
- bf.addLong((new DateWritable(i)).getDays());
+ bf.addLong(i);
}
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null,
createDateStats(10, 100));
assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred,
bf));
- bf.addLong((new DateWritable(15)).getDays());
+ bf.addLong(15);
assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicate(cs,
pred, bf));
}
@@ -1720,37 +1733,37 @@ public class TestRecordReaderImpl {
public void testDateWritableEqualsBloomFilter() throws Exception {
PredicateLeaf pred = createPredicateLeaf(
PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x",
- new DateWritable(15).get(), null);
+ LocalDate.ofEpochDay(15), null);
BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
- bf.addLong((new DateWritable(i)).getDays());
+ bf.addLong(i);
}
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null,
createDateStats(10, 100));
assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs,
pred, bf));
- bf.addLong((new DateWritable(15)).getDays());
+ bf.addLong(15);
assertEquals(TruthValue.YES_NO_NULL,
RecordReaderImpl.evaluatePredicate(cs, pred, bf));
}
@Test
public void testDateWritableInBloomFilter() throws Exception {
- List<Object> args = new ArrayList<Object>();
- args.add(new DateWritable(15).get());
- args.add(new DateWritable(19).get());
+ List<Object> args = new ArrayList<>();
+ args.add(toDate(LocalDate.ofEpochDay(15)));
+ args.add(toDate(LocalDate.ofEpochDay(19)));
PredicateLeaf pred = createPredicateLeaf
(PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE,
"x", null, args);
BloomFilter bf = new BloomFilter(10000);
for (int i = 20; i < 1000; i++) {
- bf.addLong((new DateWritable(i)).getDays());
+ bf.addLong(i);
}
ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null,
createDateStats(10, 100));
assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs,
pred, bf));
- bf.addLong((new DateWritable(19)).getDays());
+ bf.addLong(19);
assertEquals(TruthValue.YES_NO_NULL,
RecordReaderImpl.evaluatePredicate(cs, pred, bf));
- bf.addLong((new DateWritable(15)).getDays());
+ bf.addLong(15);
assertEquals(TruthValue.YES_NO_NULL,
RecordReaderImpl.evaluatePredicate(cs, pred, bf));
}