shardulm94 commented on a change in pull request #778: ORC: Implement TestGenericData and fix reader and writer issues URL: https://github.com/apache/incubator-iceberg/pull/778#discussion_r394077109
########## File path: data/src/main/java/org/apache/iceberg/data/orc/GenericOrcReader.java ########## @@ -189,30 +224,85 @@ public Long convert(ColumnVector vector, int row) { } } - private static class BinaryConverter implements Converter<byte[]> { + private static class TimestampConverter implements Converter<LocalDateTime> { + private final ZoneOffset localZoneOffset; + + TimestampConverter() { + this.localZoneOffset = OffsetDateTime.now().getOffset(); + } + + private LocalDateTime convert(TimestampColumnVector vector, int row) { + return LocalDateTime.ofEpochSecond(vector.time[row] / 1_000, vector.nanos[row], localZoneOffset); + } + + @Override + public LocalDateTime convert(ColumnVector vector, int row) { + int rowIndex = vector.isRepeating ? 0 : row; + if (!vector.noNulls && vector.isNull[rowIndex]) { + return null; + } else { + return convert((TimestampColumnVector) vector, rowIndex); + } + } + } + + private static class FixedConverter implements Converter<byte[]> { @Override public byte[] convert(ColumnVector vector, int row) { int rowIndex = vector.isRepeating ? 0 : row; if (!vector.noNulls && vector.isNull[rowIndex]) { return null; } else { BytesColumnVector bytesVector = (BytesColumnVector) vector; - return Arrays.copyOfRange(bytesVector.vector[rowIndex], - bytesVector.start[rowIndex], + return Arrays.copyOfRange(bytesVector.vector[rowIndex], bytesVector.start[rowIndex], bytesVector.start[rowIndex] + bytesVector.length[rowIndex]); } } } + private static class BinaryConverter implements Converter<ByteBuffer> { + @Override + public ByteBuffer convert(ColumnVector vector, int row) { + int rowIndex = vector.isRepeating ? 0 : row; + if (!vector.noNulls && vector.isNull[rowIndex]) { + return null; + } else { + BytesColumnVector bytesVector = (BytesColumnVector) vector; + ByteBuffer buf = ByteBuffer.allocate(bytesVector.length[rowIndex]); + buf.put(bytesVector.vector[rowIndex], bytesVector.start[rowIndex], bytesVector.length[rowIndex]); + buf.rewind(); + return buf; + } + } + } + + private static class UUIDConverter implements Converter<UUID> { + @Override + public UUID convert(ColumnVector vector, int row) { + int rowIndex = vector.isRepeating ? 0 : row; + if (!vector.noNulls && vector.isNull[rowIndex]) { + return null; + } else { + BytesColumnVector bytesVector = (BytesColumnVector) vector; + ByteBuffer buf = ByteBuffer.allocate(16); + buf.put(bytesVector.vector[rowIndex], bytesVector.start[rowIndex], 16); + buf.rewind(); + long mostSigBits = buf.getLong(); + long leastSigBits = buf.getLong(); + return new UUID(mostSigBits, leastSigBits); + } + } + } + private static class StringConverter implements Converter<String> { @Override public String convert(ColumnVector vector, int row) { BinaryConverter converter = new BinaryConverter(); - byte[] byteData = converter.convert(vector, row); - if (byteData == null) { + ByteBuffer byteBuffer = converter.convert(vector, row); + if (byteBuffer == null) { return null; } - return new String(byteData, StandardCharsets.UTF_8); + return new String(byteBuffer.array(), StandardCharsets.UTF_8); Review comment: I replaced this by not using ByteBuffers and directly creating a String from the vectors returned by ORC. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org