Repository: tajo Updated Branches: refs/heads/branch-0.11.0 87515a8a0 -> fb6e1b12d
TAJO-1918: Writing text type in Parquet should handle text bytes. Signed-off-by: Jinho Kim <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fb6e1b12 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fb6e1b12 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fb6e1b12 Branch: refs/heads/branch-0.11.0 Commit: fb6e1b12df4a9e7b629dc91d3f9b47b18696774a Parents: 87515a8 Author: Jongyoung Park <[email protected]> Authored: Thu Oct 8 16:38:11 2015 +0900 Committer: Jinho Kim <[email protected]> Committed: Thu Oct 8 16:38:11 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 ++ .../java/org/apache/tajo/datum/DateDatum.java | 5 ++ .../org/apache/tajo/datum/TimestampDatum.java | 5 ++ .../tajo/storage/parquet/TajoWriteSupport.java | 6 +-- .../org/apache/tajo/storage/TestStorages.java | 48 ++++++++++++++++++-- 5 files changed, 60 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 07f8b13..91b3efa 100644 --- a/CHANGES +++ b/CHANGES @@ -284,6 +284,9 @@ Release 0.11.0 - unreleased (Contributed by navis, Committed by hyunsik) BUG FIXES + + TAJO-1918: Writing text type in Parquet should handle text bytes. + (Contributed by Jongyoung Park. Committed by jinho) TAJO-1913: Timezone does not affect the constant folding. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java index f69aa44..ac84e25 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java @@ -54,6 +54,11 @@ public class DateDatum extends Datum { return tm; } + @Override + public byte[] asTextBytes() { + return asChars().getBytes(TextDatum.DEFAULT_CHARSET); + } + public int getCenturyOfEra() { return asTimeMeta().getCenturyOfEra(); } http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java index aaf7beb..5b4c152 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java @@ -170,6 +170,11 @@ public class TimestampDatum extends Datum { } @Override + public byte[] asTextBytes() { + return asChars().getBytes(TextDatum.DEFAULT_CHARSET); + } + + @Override public Datum equalsTo(Datum datum) { if (datum.type() == TajoDataTypes.Type.TIME) { return timestamp == datum.asInt8() ? BooleanDatum.TRUE : BooleanDatum.FALSE; http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java index de2a1e3..7469d1e 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java @@ -101,7 +101,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> { Type fieldType = fields.get(index); if (!tuple.isBlankOrNull(tajoIndex)) { recordConsumer.startField(fieldType.getName(), index); - writeValue(fieldType, column, tuple, tajoIndex); + writeValue(column, tuple, tajoIndex); recordConsumer.endField(fieldType.getName(), index); } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) { throw new RuntimeException("Null-value for required field: " + @@ -111,7 +111,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> { } } - private void writeValue(Type fieldType, Column column, Tuple tuple, int index) { + private void writeValue(Column column, Tuple tuple, int index) { switch (column.getDataType().getType()) { case BOOLEAN: recordConsumer.addBoolean(tuple.getBool(index)); @@ -138,7 +138,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> { recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index))); break; case TEXT: - recordConsumer.addBinary(Binary.fromByteArray(tuple.getBytes(index))); + recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index))); break; case PROTOBUF: case BLOB: http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java index 278de45..dafaf05 100644 --- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java +++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java @@ -706,7 +706,6 @@ public class TestStorages { appender.init(); QueryId queryid = new QueryId("12345", 5); - ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); VTuple tuple = new VTuple(new Datum[] { DatumFactory.createBool(true), @@ -721,7 +720,7 @@ public class TestStorages { DatumFactory.createBlob("hyunsik babo".getBytes()), DatumFactory.createInet4("192.168.0.1"), NullDatum.get(), - factory.createDatum(queryid.getProto()) + ProtobufDatumFactory.createDatum(queryid.getProto()) }); appender.addTuple(tuple); appender.flush(); @@ -779,7 +778,6 @@ public class TestStorages { appender.init(); QueryId queryid = new QueryId("12345", 5); - ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); VTuple tuple = new VTuple(13); tuple.put(new Datum[] { @@ -795,7 +793,7 @@ public class TestStorages { DatumFactory.createBlob("hyunsik babo".getBytes()), DatumFactory.createInet4("192.168.0.1"), NullDatum.get(), - factory.createDatum(queryid.getProto()) + ProtobufDatumFactory.createDatum(queryid.getProto()) }); appender.addTuple(tuple); appender.flush(); @@ -1114,4 +1112,46 @@ public class TestStorages { assertTrue(ok); } + + @Test + public void testDateTextHandling() throws Exception { + if (dataFormat.equalsIgnoreCase(BuiltinStorages.AVRO) || internalType) { + return; + } + + Schema schema = new Schema(); + schema.addColumn("col1", Type.TEXT); + + KeyValueSet options = new KeyValueSet(); + TableMeta meta = CatalogUtil.newTableMeta(dataFormat, options); + + FileTablespace sm = TablespaceManager.getLocalFs(); + Path tablePath = new Path(testDir, "testTextHandling.data"); + + Appender appender = sm.getAppender(meta, schema, tablePath); + + appender.init(); + + VTuple tuple = new VTuple(1); + tuple.put(0, DatumFactory.createDate(1994,7,30)); + + appender.addTuple(tuple); + appender.flush(); + appender.close(); + + FileStatus status = fs.getFileStatus(tablePath); + FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); + Scanner scanner = sm.getScanner(meta, schema, fragment, null); + scanner.init(); + + Tuple retrieved; + while ((retrieved = scanner.next()) != null) { + assertEquals(tuple.get(0).asChars(), retrieved.asDatum(0).asChars()); + } + scanner.close(); + + if (internalType){ + OldStorageManager.clearCache(); + } + } } \ No newline at end of file
