shardulm94 commented on a change in pull request #778: ORC: Implement TestGenericData and fix reader and writer issues URL: https://github.com/apache/incubator-iceberg/pull/778#discussion_r394075083
########## File path: data/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java ########## @@ -229,27 +293,68 @@ public void addValue(int rowId, byte[] data, ColumnVector output) { output.isNull[rowId] = true; } else { output.isNull[rowId] = false; - // getBinary always makes a copy, so we don't need to worry about it - // being changed behind our back. ((BytesColumnVector) output).setRef(rowId, data, 0, data.length); } } } - static class TimestampConverter implements Converter<Long> { + static class DateConverter implements Converter<LocalDate> { @Override - public Class<Long> getJavaClass() { - return Long.class; + public Class<LocalDate> getJavaClass() { + return LocalDate.class; } - public void addValue(int rowId, Long data, ColumnVector output) { + public void addValue(int rowId, LocalDate data, ColumnVector output) { + if (data == null) { + output.noNulls = false; + output.isNull[rowId] = true; + } else { + output.isNull[rowId] = false; + ((LongColumnVector) output).vector[rowId] = ChronoUnit.DAYS.between(EPOCH_DAY, data); + } + } + } + + static class TimestampTzConverter implements Converter<OffsetDateTime> { + @Override + public Class<OffsetDateTime> getJavaClass() { + return OffsetDateTime.class; + } + + public void addValue(int rowId, OffsetDateTime data, ColumnVector output) { + if (data == null) { + output.noNulls = false; + output.isNull[rowId] = true; + } else { + output.isNull[rowId] = false; + TimestampColumnVector cv = (TimestampColumnVector) output; + long micros = ChronoUnit.MICROS.between(EPOCH, data); + cv.time[rowId] = micros / 1_000; // millis + cv.nanos[rowId] = (int) (micros % 1_000_000) * 1_000; // nanos Review comment: Yes we report the millis twice. ORC code only keeps seconds from cv.time and so handles this transparently. Since ORC expects the double reporting, I think it should be fine if we keep double reporting. @omalley Thoughts? Note that the ORC reader double reports millis even if the writer does not double report millis. Details in https://issues.apache.org/jira/browse/ORC-546 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org