Repository: tajo
Updated Branches:
  refs/heads/master 21c44800e -> 51198d040


TAJO-1918: Writing text type in Parquet should handle text bytes.

Closes #814

Signed-off-by: Jinho Kim <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/51198d04
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/51198d04
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/51198d04

Branch: refs/heads/master
Commit: 51198d0407e868ee788ea36a458642eebcef90c0
Parents: 21c4480
Author: Jongyoung Park <[email protected]>
Authored: Thu Oct 8 16:35:29 2015 +0900
Committer: Jinho Kim <[email protected]>
Committed: Thu Oct 8 16:35:29 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 ++
 .../java/org/apache/tajo/datum/DateDatum.java   |  5 ++
 .../org/apache/tajo/datum/TimestampDatum.java   |  5 ++
 .../tajo/storage/parquet/TajoWriteSupport.java  |  6 +--
 .../org/apache/tajo/storage/TestStorages.java   | 48 ++++++++++++++++++--
 5 files changed, 60 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/51198d04/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 8295b40..23349a7 100644
--- a/CHANGES
+++ b/CHANGES
@@ -333,6 +333,9 @@ Release 0.11.0 - unreleased
 
   BUG FIXES
 
+    TAJO-1918: Writing text type in Parquet should handle text bytes.
+    (Contributed by Jongyoung Park. Committed by jinho) 
+
     TAJO-1901: Repair partition throws ArrayIndexOutOfBoundsException 
     occasionally. (Contributed by jaehwa, committed by hyunsik)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/51198d04/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java 
b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
index f69aa44..ac84e25 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java
@@ -54,6 +54,11 @@ public class DateDatum extends Datum {
     return tm;
   }
 
+  @Override
+  public byte[] asTextBytes() {
+    return asChars().getBytes(TextDatum.DEFAULT_CHARSET);
+  }
+
   public int getCenturyOfEra() {
     return asTimeMeta().getCenturyOfEra();
   }

http://git-wip-us.apache.org/repos/asf/tajo/blob/51198d04/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
----------------------------------------------------------------------
diff --git 
a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java 
b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
index aaf7beb..5b4c152 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/TimestampDatum.java
@@ -170,6 +170,11 @@ public class TimestampDatum extends Datum {
   }
 
   @Override
+  public byte[] asTextBytes() {
+    return asChars().getBytes(TextDatum.DEFAULT_CHARSET);
+  }
+
+  @Override
   public Datum equalsTo(Datum datum) {
     if (datum.type() == TajoDataTypes.Type.TIME) {
       return timestamp == datum.asInt8() ? BooleanDatum.TRUE : 
BooleanDatum.FALSE;

http://git-wip-us.apache.org/repos/asf/tajo/blob/51198d04/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
index acb9015..e5ad28c 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
@@ -101,7 +101,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
       Type fieldType = fields.get(index);
       if (!tuple.isBlankOrNull(tajoIndex)) {
         recordConsumer.startField(fieldType.getName(), index);
-        writeValue(fieldType, column, tuple, tajoIndex);
+        writeValue(column, tuple, tajoIndex);
         recordConsumer.endField(fieldType.getName(), index);
       } else if (fieldType.isRepetition(Type.Repetition.REQUIRED)) {
         throw new RuntimeException("Null-value for required field: " +
@@ -111,7 +111,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
     }
   }
 
-  private void writeValue(Type fieldType, Column column, Tuple tuple, int 
index) {
+  private void writeValue(Column column, Tuple tuple, int index) {
     switch (column.getDataType().getType()) {
       case BOOLEAN:
         recordConsumer.addBoolean(tuple.getBool(index));
@@ -138,7 +138,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
         
recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index)));
         break;
       case TEXT:
-        recordConsumer.addBinary(Binary.fromByteArray(tuple.getBytes(index)));
+        
recordConsumer.addBinary(Binary.fromByteArray(tuple.getTextBytes(index)));
         break;
       case PROTOBUF:
       case BLOB:

http://git-wip-us.apache.org/repos/asf/tajo/blob/51198d04/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
 
b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
index 278de45..dafaf05 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -706,7 +706,6 @@ public class TestStorages {
     appender.init();
 
     QueryId queryid = new QueryId("12345", 5);
-    ProtobufDatumFactory factory = 
ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
 
     VTuple tuple = new VTuple(new Datum[] {
         DatumFactory.createBool(true),
@@ -721,7 +720,7 @@ public class TestStorages {
         DatumFactory.createBlob("hyunsik babo".getBytes()),
         DatumFactory.createInet4("192.168.0.1"),
         NullDatum.get(),
-        factory.createDatum(queryid.getProto())
+        ProtobufDatumFactory.createDatum(queryid.getProto())
     });
     appender.addTuple(tuple);
     appender.flush();
@@ -779,7 +778,6 @@ public class TestStorages {
     appender.init();
 
     QueryId queryid = new QueryId("12345", 5);
-    ProtobufDatumFactory factory = 
ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
 
     VTuple tuple = new VTuple(13);
     tuple.put(new Datum[] {
@@ -795,7 +793,7 @@ public class TestStorages {
         DatumFactory.createBlob("hyunsik babo".getBytes()),
         DatumFactory.createInet4("192.168.0.1"),
         NullDatum.get(),
-        factory.createDatum(queryid.getProto())
+        ProtobufDatumFactory.createDatum(queryid.getProto())
     });
     appender.addTuple(tuple);
     appender.flush();
@@ -1114,4 +1112,46 @@ public class TestStorages {
 
     assertTrue(ok);
   }
+
+  @Test
+  public void testDateTextHandling() throws Exception {
+    if (dataFormat.equalsIgnoreCase(BuiltinStorages.AVRO) || internalType) {
+      return;
+    }
+
+    Schema schema = new Schema();
+    schema.addColumn("col1", Type.TEXT);
+
+    KeyValueSet options = new KeyValueSet();
+    TableMeta meta = CatalogUtil.newTableMeta(dataFormat, options);
+
+    FileTablespace sm = TablespaceManager.getLocalFs();
+    Path tablePath = new Path(testDir, "testTextHandling.data");
+
+    Appender appender = sm.getAppender(meta, schema, tablePath);
+
+    appender.init();
+
+    VTuple tuple = new VTuple(1);
+    tuple.put(0, DatumFactory.createDate(1994,7,30));
+
+    appender.addTuple(tuple);
+    appender.flush();
+    appender.close();
+
+    FileStatus status = fs.getFileStatus(tablePath);
+    FileFragment fragment = new FileFragment("table", tablePath, 0, 
status.getLen());
+    Scanner scanner = sm.getScanner(meta, schema, fragment, null);
+    scanner.init();
+
+    Tuple retrieved;
+    while ((retrieved = scanner.next()) != null) {
+      assertEquals(tuple.get(0).asChars(), retrieved.asDatum(0).asChars());
+    }
+    scanner.close();
+
+    if (internalType){
+      OldStorageManager.clearCache();
+    }
+  }
 }
\ No newline at end of file

Reply via email to