Repository: tajo
Updated Branches:
  refs/heads/branch-0.11.0 87515a8a0 -> fb6e1b12d


TAJO-1918: Writing text type in Parquet should handle text bytes.

Signed-off-by: Jinho Kim <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fb6e1b12
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fb6e1b12
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fb6e1b12

Branch: refs/heads/branch-0.11.0
Commit: fb6e1b12df4a9e7b629dc91d3f9b47b18696774a
Parents: 87515a8
Author: Jongyoung Park <[email protected]>
Authored: Thu Oct 8 16:38:11 2015 +0900
Committer: Jinho Kim <[email protected]>
Committed: Thu Oct 8 16:38:11 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 ++
 .../java/org/apache/tajo/datum/DateDatum.java   |  5 ++
 .../org/apache/tajo/datum/TimestampDatum.java   |  5 ++
 .../tajo/storage/parquet/TajoWriteSupport.java  |  6 +--
 .../org/apache/tajo/storage/TestStorages.java   | 48 ++++++++++++++++++--
 5 files changed, 60 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 07f8b13..91b3efa 100644
--- a/CHANGES
+++ b/CHANGES
@@ -284,6 +284,9 @@ Release 0.11.0 - unreleased
     (Contributed by navis, Committed by hyunsik)
 
   BUG FIXES
+
+    TAJO-1918: Writing text type in Parquet should handle text bytes.
+    (Contributed by Jongyoung Park. Committed by jinho)
   
     TAJO-1913: Timezone does not affect the constant folding. (hyunsik)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java 
b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
index f69aa44..ac84e25 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
@@ -54,6 +54,11 @@ public class DateDatum extends Datum {
     return tm;
   }
 
+  @Override
+  public byte[] asTextBytes() {
+    return asChars().getBytes(TextDatum.DEFAULT_CHARSET);
+  }
+
   public int getCenturyOfEra() {
     return asTimeMeta().getCenturyOfEra();
   }

http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
----------------------------------------------------------------------
diff --git 
a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java 
b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
index aaf7beb..5b4c152 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
@@ -170,6 +170,11 @@ public class TimestampDatum extends Datum {
   }
 
   @Override
+  public byte[] asTextBytes() {
+    return asChars().getBytes(TextDatum.DEFAULT_CHARSET);
+  }
+
+  @Override
   public Datum equalsTo(Datum datum) {
     if (datum.type() == TajoDataTypes.Type.TIME) {
       return timestamp == datum.asInt8() ? BooleanDatum.TRUE : 
BooleanDatum.FALSE;

http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
index de2a1e3..7469d1e 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
@@ -101,7 +101,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
       Type fieldType = fields.get(index);
       if (!tuple.isBlankOrNull(tajoIndex)) {
         recordConsumer.startField(fieldType.getName(), index);
-        writeValue(fieldType, column, tuple, tajoIndex);
+        writeValue(column, tuple, tajoIndex);
         recordConsumer.endField(fieldType.getName(), index);
       } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
         throw new RuntimeException("Null-value for required field: " +
@@ -111,7 +111,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
     }
   }
 
-  private void writeValue(Type fieldType, Column column, Tuple tuple, int 
index) {
+  private void writeValue(Column column, Tuple tuple, int index) {
     switch (column.getDataType().getType()) {
       case BOOLEAN:
         recordConsumer.addBoolean(tuple.getBool(index));
@@ -138,7 +138,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
         
recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index)));
         break;
       case TEXT:
-        recordConsumer.addBinary(Binary.fromByteArray(tuple.getBytes(index)));
+        
recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index)));
         break;
       case PROTOBUF:
       case BLOB:

http://git-wip-us.apache.org/repos/asf/tajo/blob/fb6e1b12/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
 
b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
index 278de45..dafaf05 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -706,7 +706,6 @@ public class TestStorages {
     appender.init();
 
     QueryId queryid = new QueryId("12345", 5);
-    ProtobufDatumFactory factory = 
ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
 
     VTuple tuple = new VTuple(new Datum[] {
         DatumFactory.createBool(true),
@@ -721,7 +720,7 @@ public class TestStorages {
         DatumFactory.createBlob("hyunsik babo".getBytes()),
         DatumFactory.createInet4("192.168.0.1"),
         NullDatum.get(),
-        factory.createDatum(queryid.getProto())
+        ProtobufDatumFactory.createDatum(queryid.getProto())
     });
     appender.addTuple(tuple);
     appender.flush();
@@ -779,7 +778,6 @@ public class TestStorages {
     appender.init();
 
     QueryId queryid = new QueryId("12345", 5);
-    ProtobufDatumFactory factory = 
ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
 
     VTuple tuple = new VTuple(13);
     tuple.put(new Datum[] {
@@ -795,7 +793,7 @@ public class TestStorages {
         DatumFactory.createBlob("hyunsik babo".getBytes()),
         DatumFactory.createInet4("192.168.0.1"),
         NullDatum.get(),
-        factory.createDatum(queryid.getProto())
+        ProtobufDatumFactory.createDatum(queryid.getProto())
     });
     appender.addTuple(tuple);
     appender.flush();
@@ -1114,4 +1112,46 @@ public class TestStorages {
 
     assertTrue(ok);
   }
+
+  @Test
+  public void testDateTextHandling() throws Exception {
+    if (dataFormat.equalsIgnoreCase(BuiltinStorages.AVRO) || internalType) {
+      return;
+    }
+
+    Schema schema = new Schema();
+    schema.addColumn("col1", Type.TEXT);
+
+    KeyValueSet options = new KeyValueSet();
+    TableMeta meta = CatalogUtil.newTableMeta(dataFormat, options);
+
+    FileTablespace sm = TablespaceManager.getLocalFs();
+    Path tablePath = new Path(testDir, "testTextHandling.data");
+
+    Appender appender = sm.getAppender(meta, schema, tablePath);
+
+    appender.init();
+
+    VTuple tuple = new VTuple(1);
+    tuple.put(0, DatumFactory.createDate(1994,7,30));
+
+    appender.addTuple(tuple);
+    appender.flush();
+    appender.close();
+
+    FileStatus status = fs.getFileStatus(tablePath);
+    FileFragment fragment = new FileFragment("table", tablePath, 0, 
status.getLen());
+    Scanner scanner = sm.getScanner(meta, schema, fragment, null);
+    scanner.init();
+
+    Tuple retrieved;
+    while ((retrieved = scanner.next()) != null) {
+      assertEquals(tuple.get(0).asChars(), retrieved.asDatum(0).asChars());
+    }
+    scanner.close();
+
+    if (internalType){
+      OldStorageManager.clearCache();
+    }
+  }
 }
\ No newline at end of file

Reply via email to