Repository: tajo Updated Branches: refs/heads/index_support 761e372ba -> 28151a965
TAJO-1236: Remove slow 'new String' operation in parquet format. (jinho) Closes #292 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/661c7e21 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/661c7e21 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/661c7e21 Branch: refs/heads/index_support Commit: 661c7e216d664c71d2c889082703ecc6ae028dc3 Parents: 8f68b4b Author: jhkim <[email protected]> Authored: Tue Dec 9 14:42:02 2014 +0900 Committer: jhkim <[email protected]> Committed: Tue Dec 9 14:42:02 2014 +0900 ---------------------------------------------------------------------- CHANGES | 3 ++ .../storage/parquet/TajoRecordConverter.java | 34 ++++++++------------ .../tajo/storage/parquet/TajoWriteSupport.java | 23 +++++++------ 3 files changed, 28 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 59a8f68..89d39bc 100644 --- a/CHANGES +++ b/CHANGES @@ -18,6 +18,9 @@ Release 0.9.1 - unreleased IMPROVEMENT + TAJO-1236: Remove slow 'new String' operation in parquet format. + (jinho) + TAJO-1230: Disable ipv6 support on JVM. (Jihun Kang via hyunsik) TAJO-1213: Implement CatalogStore::updateTableStats. (jaehwa) http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java ---------------------------------------------------------------------- diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java index 7c3d79d..a091eac 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java @@ -18,29 +18,23 @@ package org.apache.tajo.storage.parquet; -import com.google.protobuf.Message; import com.google.protobuf.InvalidProtocolBufferException; - -import java.nio.ByteBuffer; - -import parquet.io.api.GroupConverter; -import parquet.io.api.Converter; -import parquet.io.api.PrimitiveConverter; -import parquet.io.api.Binary; -import parquet.schema.Type; -import parquet.schema.GroupType; - +import com.google.protobuf.Message; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.catalog.Schema; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.common.TajoDataTypes.DataType; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.Column; +import org.apache.tajo.datum.*; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.VTuple; -import org.apache.tajo.datum.DatumFactory; -import org.apache.tajo.datum.Datum; -import org.apache.tajo.datum.BlobDatum; -import org.apache.tajo.datum.NullDatum; -import org.apache.tajo.datum.ProtobufDatumFactory; +import parquet.io.api.Binary; +import parquet.io.api.Converter; +import parquet.io.api.GroupConverter; +import parquet.io.api.PrimitiveConverter; +import parquet.schema.GroupType; +import parquet.schema.Type; + +import java.nio.ByteBuffer; /** * Converter to convert a Parquet record into a Tajo Tuple. @@ -222,7 +216,7 @@ public class TajoRecordConverter extends GroupConverter { @Override final public void addBinary(Binary value) { - parent.add(DatumFactory.createChar(value.toStringUsingUTF8())); + parent.add(DatumFactory.createChar(value.getBytes())); } } @@ -343,7 +337,7 @@ public class TajoRecordConverter extends GroupConverter { @Override final public void addBinary(Binary value) { - parent.add(DatumFactory.createText(value.toStringUsingUTF8())); + parent.add(DatumFactory.createText(value.getBytes())); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java ---------------------------------------------------------------------- diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java index 35165de..8651131 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java @@ -18,10 +18,12 @@ package org.apache.tajo.storage.parquet; -import java.util.Map; -import java.util.HashMap; -import java.util.List; - +import org.apache.hadoop.conf.Configuration; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.storage.Tuple; import parquet.hadoop.api.WriteSupport; import parquet.io.api.Binary; import parquet.io.api.RecordConsumer; @@ -29,12 +31,9 @@ import parquet.schema.GroupType; import parquet.schema.MessageType; import parquet.schema.Type; -import org.apache.hadoop.conf.Configuration; -import org.apache.tajo.catalog.Schema; -import org.apache.tajo.catalog.Column; -import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.storage.Tuple; -import org.apache.tajo.datum.Datum; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Tajo implementation of {@link WriteSupport} for {@link Tuple}s. @@ -116,7 +115,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> { private void writeValue(Type fieldType, Column column, Datum datum) { switch (column.getDataType().getType()) { case BOOLEAN: - recordConsumer.addBoolean((Boolean) datum.asBool()); + recordConsumer.addBoolean(datum.asBool()); break; case BIT: case INT2: @@ -134,7 +133,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> { break; case CHAR: case TEXT: - recordConsumer.addBinary(Binary.fromString(datum.asChars())); + recordConsumer.addBinary(Binary.fromByteArray(datum.asTextBytes())); break; case PROTOBUF: case BLOB:
