TAJO-1465: Add ORCFileAppender to write into ORCFile table. Signed-off-by: Hyunsik Choi <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/8763d42b Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/8763d42b Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/8763d42b Branch: refs/heads/master Commit: 8763d42b90c5078232ac09bda479f97667ffb143 Parents: 8baaa48 Author: Jongyoung Park <[email protected]> Authored: Wed Sep 16 07:32:52 2015 -0700 Committer: Hyunsik Choi <[email protected]> Committed: Wed Sep 16 07:34:50 2015 -0700 ---------------------------------------------------------------------- CHANGES | 3 + .../java/org/apache/tajo/BuiltinStorages.java | 1 + .../java/org/apache/tajo/datum/DateDatum.java | 91 +- .../main/java/org/apache/tajo/datum/Datum.java | 29 +- .../org/apache/tajo/datum/DatumFactory.java | 32 +- .../java/org/apache/tajo/datum/Float4Datum.java | 8 +- .../java/org/apache/tajo/datum/Int8Datum.java | 8 +- .../java/org/apache/tajo/datum/NullDatum.java | 5 +- .../apache/tajo/datum/ProtobufDatumFactory.java | 22 +- .../java/org/apache/tajo/datum/TextDatum.java | 7 +- .../java/org/apache/tajo/datum/TimeDatum.java | 9 +- .../apache/tajo/exception/ErrorMessages.java | 1 + .../apache/tajo/exception/ExceptionUtil.java | 1 + .../tajo/exception/InvalidCastException.java | 36 - .../exception/InvalidValueForCastException.java | 39 + .../apache/tajo/exception/SQLExceptionUtil.java | 3 +- .../apache/tajo/storage/StorageConstants.java | 17 + tajo-common/src/main/proto/errors.proto | 3 +- .../org/apache/tajo/datum/TestDateDatum.java | 6 +- .../org/apache/tajo/datum/TestTimeDatum.java | 8 +- .../apache/tajo/datum/TestTimestampDatum.java | 8 +- .../tajo/engine/query/TestSelectQuery.java | 21 +- .../datetime_table_timezoned_orc_ddl.sql | 4 + .../TestSelectQuery/testTimezonedORCTable.sql | 2 + .../testTimezonedORCTable.result | 5 + .../engine/codegen/TajoGeneratorAdapter.java | 18 +- .../stream/TextFieldSerializerDeserializer.java | 2 +- .../storage/BinarySerializerDeserializer.java | 6 +- .../src/main/resources/storage-default.xml | 7 +- .../src/test/resources/storage-default.xml | 8 + tajo-storage/tajo-storage-hdfs/pom.xml | 1 + .../java/org/apache/tajo/storage/RawFile.java | 5 +- .../apache/tajo/storage/orc/ORCAppender.java | 130 + .../org/apache/tajo/storage/orc/ORCScanner.java | 29 +- .../objectinspector/ObjectInspectorFactory.java | 91 + .../TajoBlobObjectInspector.java | 82 + .../TajoBooleanObjectInspector.java | 76 + .../TajoDateObjectInspector.java | 73 + .../TajoDoubleObjectInspector.java | 76 + .../TajoFloatObjectInspector.java | 77 + .../objectinspector/TajoIntObjectInspector.java | 76 + .../TajoLongObjectInspector.java | 76 + .../TajoNullObjectInspector.java | 69 + .../TajoPrimitiveObjectInspector.java | 38 + .../TajoShortObjectInspector.java | 76 + .../TajoStringObjectInspector.java | 71 + .../TajoStructObjectInspector.java | 120 + .../TajoTimestampObjectInspector.java | 73 + .../apache/tajo/storage/orc/package-info.java | 95 + .../text/TextFieldSerializerDeserializer.java | 2 +- .../thirdparty/orc/BinaryColumnStatistics.java | 25 + .../storage/thirdparty/orc/BitFieldWriter.java | 69 + .../storage/thirdparty/orc/BloomFilterIO.java | 42 + .../thirdparty/orc/BooleanColumnStatistics.java | 27 + .../thirdparty/orc/ColumnStatistics.java | 36 + .../thirdparty/orc/ColumnStatisticsImpl.java | 1017 ++++++++ .../thirdparty/orc/CompressionCodec.java | 68 + .../storage/thirdparty/orc/CompressionKind.java | 27 + .../thirdparty/orc/DateColumnStatistics.java | 37 + .../thirdparty/orc/DecimalColumnStatistics.java | 45 + .../orc/DirectDecompressionCodec.java | 26 + .../thirdparty/orc/DoubleColumnStatistics.java | 44 + .../thirdparty/orc/DynamicByteArray.java | 303 +++ .../storage/thirdparty/orc/DynamicIntArray.java | 142 ++ .../thirdparty/orc/IntegerColumnStatistics.java | 50 + .../storage/thirdparty/orc/IntegerWriter.java | 47 + .../storage/thirdparty/orc/MemoryManager.java | 212 ++ .../tajo/storage/thirdparty/orc/Metadata.java | 45 + .../tajo/storage/thirdparty/orc/OrcConf.java | 149 ++ .../tajo/storage/thirdparty/orc/OrcFile.java | 460 ++++ .../tajo/storage/thirdparty/orc/OrcUtils.java | 201 ++ .../tajo/storage/thirdparty/orc/OutStream.java | 286 +++ .../thirdparty/orc/PositionRecorder.java | 25 + .../thirdparty/orc/PositionedOutputStream.java | 38 + .../storage/thirdparty/orc/RedBlackTree.java | 309 +++ .../thirdparty/orc/RunLengthByteWriter.java | 106 + .../thirdparty/orc/RunLengthIntegerWriter.java | 143 ++ .../orc/RunLengthIntegerWriterV2.java | 832 +++++++ .../thirdparty/orc/SerializationUtils.java | 844 +++++++ .../storage/thirdparty/orc/SnappyCodec.java | 109 + .../tajo/storage/thirdparty/orc/StreamName.java | 95 + .../thirdparty/orc/StringColumnStatistics.java | 41 + .../thirdparty/orc/StringRedBlackTree.java | 202 ++ .../thirdparty/orc/StripeInformation.java | 59 + .../thirdparty/orc/StripeStatistics.java | 42 + .../orc/TimestampColumnStatistics.java | 38 + .../tajo/storage/thirdparty/orc/Writer.java | 99 + .../tajo/storage/thirdparty/orc/WriterImpl.java | 2265 ++++++++++++++++++ .../tajo/storage/thirdparty/orc/ZlibCodec.java | 169 ++ .../storage/thirdparty/orc/package-info.java | 37 + .../thirdparty/orc/util/BloomFilter.java | 291 +++ .../storage/thirdparty/orc/util/Murmur3.java | 334 +++ .../src/main/proto/orc_proto.proto | 217 ++ .../org/apache/tajo/storage/TestStorages.java | 7 +- .../apache/tajo/storage/orc/TestORCScanner.java | 107 - .../src/test/resources/storage-default.xml | 7 +- 96 files changed, 11232 insertions(+), 288 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 2804da6..8773db5 100644 --- a/CHANGES +++ b/CHANGES @@ -625,6 +625,9 @@ Release 0.11.0 - unreleased SUB TASKS + TAJO-1465: Add ORCFileAppender to write into ORCFile table. + (Contributed by Joyngyoung Park, committed by hyunsik) + TAJO-1841: Eliminate explicit diamond expressions in tajo-{algebra,rpc} (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/BuiltinStorages.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/BuiltinStorages.java b/tajo-common/src/main/java/org/apache/tajo/BuiltinStorages.java index 11f0287..2c8f686 100644 --- a/tajo-common/src/main/java/org/apache/tajo/BuiltinStorages.java +++ b/tajo-common/src/main/java/org/apache/tajo/BuiltinStorages.java @@ -26,6 +26,7 @@ public class BuiltinStorages { public static final String RCFILE = "RCFILE"; public static final String ROW = "ROW"; public static final String PARQUET = "PARQUET"; + public static final String ORC = "ORC"; public static final String SEQUENCE_FILE = "SEQUENCEFILE"; public static final String AVRO = "AVRO"; public static final String HBASE = "HBASE"; http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java index 093a8be..f69aa44 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/DateDatum.java @@ -21,10 +21,12 @@ package org.apache.tajo.datum; import com.google.common.primitives.Ints; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.common.TajoDataTypes.Type; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.InvalidOperationException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.util.Bytes; import org.apache.tajo.util.datetime.DateTimeConstants.DateStyle; +import org.apache.tajo.util.datetime.DateTimeFormat; import org.apache.tajo.util.datetime.DateTimeUtil; import org.apache.tajo.util.datetime.TimeMeta; @@ -32,69 +34,60 @@ public class DateDatum extends Datum { public static final int SIZE = 4; // Dates are stored in UTC. - final int year; - final int monthOfYear; - final int dayOfMonth; + private int jdate; public DateDatum(int value) { - this(DateTimeUtil.j2date(value)); + super(TajoDataTypes.Type.DATE); + + jdate = value; } public DateDatum(TimeMeta tm) { super(TajoDataTypes.Type.DATE); - year = tm.years; - monthOfYear = tm.monthOfYear; - dayOfMonth = tm.dayOfMonth; + jdate = DateTimeUtil.date2j(tm.years, tm.monthOfYear, tm.dayOfMonth); } public TimeMeta asTimeMeta() { TimeMeta tm = new TimeMeta(); - tm.years = year; - tm.monthOfYear = monthOfYear; - tm.dayOfMonth = dayOfMonth; + DateTimeUtil.j2date(jdate, tm); + return tm; } public int getCenturyOfEra() { - TimeMeta tm = asTimeMeta(); - return tm.getCenturyOfEra(); + return asTimeMeta().getCenturyOfEra(); } public int getYear() { - return year; + return asTimeMeta().years; } public int getWeekyear() { - TimeMeta tm = asTimeMeta(); - return tm.getWeekyear(); + return asTimeMeta().getWeekyear(); } public int getMonthOfYear() { - return monthOfYear; + return asTimeMeta().monthOfYear; } public int getDayOfYear() { - TimeMeta tm = asTimeMeta(); - return tm.getDayOfYear(); + return asTimeMeta().getDayOfYear(); } public int getDayOfWeek() { - TimeMeta tm = asTimeMeta(); - return tm.getDayOfWeek(); + return asTimeMeta().getDayOfWeek(); } public int getISODayOfWeek() { - TimeMeta tm = asTimeMeta(); - return tm.getISODayOfWeek(); + return asTimeMeta().getISODayOfWeek(); } public int getWeekOfYear() { - TimeMeta tm = asTimeMeta(); - return tm.getWeekOfYear(); + return asTimeMeta().getWeekOfYear(); } public int getDayOfMonth() { - return dayOfMonth; + return asTimeMeta().dayOfMonth; } @Override @@ -156,9 +149,7 @@ public class DateDatum extends Datum { } case DATE: { DateDatum d = (DateDatum) datum; - int day1 = DateTimeUtil.date2j(year, monthOfYear, dayOfMonth); - int day2 = DateTimeUtil.date2j(d.year, d.monthOfYear, d.dayOfMonth); - return new Int4Datum(day1 - day2); + return new Int4Datum(jdate - d.jdate); } default: throw new InvalidOperationException(datum.type()); @@ -167,31 +158,31 @@ public class DateDatum extends Datum { @Override public int asInt4() { - return encode(); - } - - private int encode() { - return DateTimeUtil.date2j(year, monthOfYear, dayOfMonth); + return jdate; } @Override public long asInt8() { - return encode(); + return jdate; } @Override public float asFloat4() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(Type.DATE, Type.FLOAT4)); } @Override public double asFloat8() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(Type.DATE, Type.FLOAT8)); } @Override public String asChars() { - return DateTimeUtil.encodeDate(year, monthOfYear, dayOfMonth, DateStyle.ISO_DATES); + return DateTimeUtil.encodeDate(asTimeMeta(), DateStyle.ISO_DATES); + } + + public String toChars(String format) { + return DateTimeFormat.to_char(asTimeMeta(), format); } @Override @@ -201,7 +192,7 @@ public class DateDatum extends Datum { @Override public byte [] asByteArray() { - return Bytes.toBytes(encode()); + return Bytes.toBytes(jdate); } @Override @@ -219,15 +210,7 @@ public class DateDatum extends Datum { public int compareTo(Datum datum) { if (datum.type() == TajoDataTypes.Type.DATE) { DateDatum another = (DateDatum) datum; - int compare = Ints.compare(year, another.year); - if (compare != 0) { - return compare; - } - compare = Ints.compare(monthOfYear, another.monthOfYear); - if (compare != 0) { - return compare; - } - return Ints.compare(dayOfMonth, another.dayOfMonth); + return Ints.compare(jdate, another.jdate); } else if (datum.type() == TajoDataTypes.Type.TIMESTAMP) { TimestampDatum another = (TimestampDatum) datum; TimeMeta myMeta, otherMeta; @@ -243,9 +226,10 @@ public class DateDatum extends Datum { @Override public boolean equals(Object obj) { + TimeMeta tm = asTimeMeta(); if (obj instanceof DateDatum) { - DateDatum another = (DateDatum) obj; - return year == another.year && monthOfYear == another.monthOfYear && dayOfMonth == another.dayOfMonth; + TimeMeta another = ((DateDatum) obj).asTimeMeta(); + return tm.years == another.years && tm.monthOfYear == another.monthOfYear && tm.dayOfMonth == another.dayOfMonth; } else { return false; } @@ -253,10 +237,11 @@ public class DateDatum extends Datum { @Override public int hashCode() { + TimeMeta tm = asTimeMeta(); int total = 157; - total = 23 * total + year; - total = 23 * total + monthOfYear; - total = 23 * total + dayOfMonth; + total = 23 * total + tm.years; + total = 23 * total + tm.monthOfYear; + total = 23 * total + tm.dayOfMonth; return total; } http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/Datum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Datum.java index e3e1295..6aa11ce 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Datum.java @@ -22,8 +22,9 @@ import com.google.gson.annotations.Expose; import org.apache.tajo.SessionVars; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.conf.TajoConf.ConfVars; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.InvalidOperationException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.json.CommonGsonHelper; import org.apache.tajo.json.GsonObject; import org.apache.tajo.util.datetime.TimeMeta; @@ -66,46 +67,48 @@ public abstract class Datum implements Comparable<Datum>, GsonObject { } public boolean asBool() { - throw new InvalidCastException(type, Type.BOOLEAN); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.BOOLEAN)); } public byte asByte() { - throw new InvalidCastException(type, Type.BIT); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.BIT)); } public char asChar() { - throw new InvalidCastException(type, Type.CHAR); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.CHAR)); } public short asInt2() { - throw new InvalidCastException(type, Type.INT2); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.INT2)); } + public int asInt4() { - throw new InvalidCastException(type, Type.INT4); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.INT4)); } + public long asInt8() { - throw new InvalidCastException(type, Type.INT8); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.INT8)); } public byte [] asByteArray() { - throw new InvalidCastException(type, Type.BLOB); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.BLOB)); } public float asFloat4() { - throw new InvalidCastException(type, Type.FLOAT4); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.FLOAT4)); } public double asFloat8() { - throw new InvalidCastException(type, Type.FLOAT8); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.FLOAT8)); } public String asChars() { - throw new InvalidCastException(type, Type.TEXT); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.TEXT)); } // todo remove this public char [] asUnicodeChars() { - throw new InvalidCastException(type, Type.TEXT); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.TEXT)); } public byte[] asTextBytes() { @@ -113,7 +116,7 @@ public abstract class Datum implements Comparable<Datum>, GsonObject { } public TimeMeta asTimeMeta() { - throw new InvalidCastException(type, Type.INT8); + throw new TajoRuntimeException(new InvalidValueForCastException(type, Type.INT8)); } public boolean isNumeric() { http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/DatumFactory.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/DatumFactory.java b/tajo-common/src/main/java/org/apache/tajo/datum/DatumFactory.java index 7a042fa..dd4a4e4 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/DatumFactory.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/DatumFactory.java @@ -18,11 +18,13 @@ package org.apache.tajo.datum; -import com.google.protobuf.Message; import org.apache.commons.codec.binary.Base64; import org.apache.tajo.common.TajoDataTypes.DataType; import org.apache.tajo.common.TajoDataTypes.Type; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; +import org.apache.tajo.exception.TajoInternalError; +import org.apache.tajo.exception.TajoRuntimeException; +import org.apache.tajo.exception.UnsupportedDataTypeException; import org.apache.tajo.util.NumberUtil; import org.apache.tajo.util.datetime.DateTimeFormat; import org.apache.tajo.util.datetime.DateTimeUtil; @@ -71,7 +73,7 @@ public class DatumFactory { case NULL_TYPE: return NullDatum.class; default: - throw new UnsupportedOperationException(type.name()); + throw new TajoRuntimeException(new UnsupportedDataTypeException(type.name())); } } @@ -107,7 +109,7 @@ public class DatumFactory { case INET4: return createInet4(value); default: - throw new UnsupportedOperationException(dataType.toString()); + throw new TajoRuntimeException(new UnsupportedDataTypeException(dataType.toString())); } } @@ -143,17 +145,13 @@ public class DatumFactory { case INET4: return createInet4(bytes); case PROTOBUF: - ProtobufDatumFactory factory = ProtobufDatumFactory.get(dataType); - Message.Builder builder = factory.newBuilder(); try { - builder.mergeFrom(bytes); - return factory.createDatum(builder.build()); + return ProtobufDatumFactory.createDatum(dataType, bytes); } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); + throw new TajoInternalError(e); } default: - throw new UnsupportedOperationException(dataType.toString()); + throw new TajoRuntimeException(new UnsupportedDataTypeException(dataType.toString())); } } @@ -164,7 +162,7 @@ public class DatumFactory { case DATE: return new DateDatum(val); default: - throw new UnsupportedOperationException("Cannot create " + type.getType().name() + " datum from INT4"); + throw new TajoRuntimeException(new UnsupportedDataTypeException(type.getType().name())); } } @@ -177,7 +175,7 @@ public class DatumFactory { case TIME: return createTime(val); default: - throw new UnsupportedOperationException("Cannot create " + type.getType().name() + " datum from INT8"); + throw new TajoRuntimeException(new UnsupportedDataTypeException(type.getType().name())); } } @@ -333,7 +331,7 @@ public class DatumFactory { case DATE: return (DateDatum) datum; default: - throw new InvalidCastException(datum.type(), Type.DATE); + throw new TajoRuntimeException(new InvalidValueForCastException(datum.type(), Type.DATE)); } } @@ -352,7 +350,7 @@ public class DatumFactory { case TIME: return (TimeDatum) datum; default: - throw new InvalidCastException(datum.type(), Type.TIME); + throw new TajoRuntimeException(new InvalidValueForCastException(datum.type(), Type.TIME)); } } @@ -365,7 +363,7 @@ public class DatumFactory { case TIMESTAMP: return (TimestampDatum) datum; default: - throw new InvalidCastException(datum.type(), Type.TIMESTAMP); + throw new TajoRuntimeException(new InvalidValueForCastException(datum.type(), Type.TIMESTAMP)); } } @@ -462,7 +460,7 @@ public class DatumFactory { case ANY: return DatumFactory.createAny(operandDatum); default: - throw new InvalidCastException(operandDatum.type(), target.getType()); + throw new TajoRuntimeException(new InvalidValueForCastException(operandDatum.type(), target.getType())); } } } http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java index 0fe598a..5d56984 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java @@ -20,11 +20,11 @@ package org.apache.tajo.datum; import com.google.gson.annotations.Expose; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.InvalidOperationException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.util.MurmurHash; import org.apache.tajo.util.NumberUtil; -import org.apache.tajo.util.datetime.DateTimeUtil; import org.apache.tajo.util.datetime.TimeMeta; import java.nio.ByteBuffer; @@ -45,7 +45,7 @@ public class Float4Datum extends NumericDatum { } public boolean asBool() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.FLOAT4, TajoDataTypes.Type.BOOLEAN)); } @Override @@ -70,7 +70,7 @@ public class Float4Datum extends NumericDatum { @Override public byte asByte() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.FLOAT4, TajoDataTypes.Type.BIT)); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java index 66c093a..86f19c7 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java @@ -20,11 +20,11 @@ package org.apache.tajo.datum; import com.google.gson.annotations.Expose; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.InvalidOperationException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.util.MurmurHash; import org.apache.tajo.util.NumberUtil; -import org.apache.tajo.util.datetime.DateTimeUtil; import org.apache.tajo.util.datetime.TimeMeta; import java.nio.ByteBuffer; @@ -47,7 +47,7 @@ public class Int8Datum extends NumericDatum { @Override public boolean asBool() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.INT8, TajoDataTypes.Type.BOOLEAN)); } @Override @@ -72,7 +72,7 @@ public class Int8Datum extends NumericDatum { @Override public byte asByte() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.INT8, TajoDataTypes.Type.BIT)); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/NullDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/NullDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/NullDatum.java index 0007b52..76d323b 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/NullDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/NullDatum.java @@ -19,7 +19,8 @@ package org.apache.tajo.datum; import org.apache.tajo.common.TajoDataTypes.DataType; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; +import org.apache.tajo.exception.TajoRuntimeException; import static org.apache.tajo.common.TajoDataTypes.Type; @@ -58,7 +59,7 @@ public class NullDatum extends Datum { @Override public boolean asBool() { - throw new InvalidCastException(Type.NULL_TYPE, Type.BOOLEAN); + throw new TajoRuntimeException(new InvalidValueForCastException(Type.NULL_TYPE, Type.BOOLEAN)); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/ProtobufDatumFactory.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/ProtobufDatumFactory.java b/tajo-common/src/main/java/org/apache/tajo/datum/ProtobufDatumFactory.java index 0d585a4..a30e52c 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/ProtobufDatumFactory.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/ProtobufDatumFactory.java @@ -21,6 +21,7 @@ package org.apache.tajo.datum; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.google.protobuf.GeneratedMessage; +import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Message; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; @@ -60,14 +61,30 @@ public class ProtobufDatumFactory { return (T) builder; } - public ProtobufDatum createDatum(Message.Builder builder) { + public static ProtobufDatum createDatum(Message.Builder builder) { return createDatum(builder.build()); } - public ProtobufDatum createDatum(Message message) { + public static ProtobufDatum createDatum(Message message) { return new ProtobufDatum(message); } + public static ProtobufDatum createDatum(String className, byte [] bytes, int offset, int length) + throws InvalidProtocolBufferException { + ProtobufDatumFactory factory = get(className); + Message.Builder builder = factory.newBuilder(); + builder.mergeFrom(bytes, offset, length); + return createDatum(builder); + } + + public static Datum createDatum(DataType type, byte[] bytes) + throws InvalidProtocolBufferException { + ProtobufDatumFactory factory = get(type); + Message.Builder builder = factory.newBuilder(); + builder.mergeFrom(bytes); + return createDatum(builder); + } + public static ProtobufDatumFactory get(DataType dataType) { Preconditions.checkArgument(dataType.getType() == TajoDataTypes.Type.PROTOBUF, "ProtobufDatumFactory only can accepts Protocol Buffer Datum Type."); @@ -88,4 +105,5 @@ public class ProtobufDatumFactory { public static String toJson(Message message) { return protobufFormatter.printToString(message); } + } http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java index ffd6ca2..df810fe 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java @@ -22,8 +22,9 @@ import com.google.common.primitives.UnsignedBytes; import com.google.gson.annotations.Expose; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.InvalidOperationException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.util.MurmurHash; import org.apache.tajo.util.StringUtils; @@ -51,12 +52,12 @@ public class TextDatum extends Datum { @Override public boolean asBool() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.TEXT, TajoDataTypes.Type.BOOLEAN)); } @Override public byte asByte() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.TEXT, TajoDataTypes.Type.BIT)); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/datum/TimeDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TimeDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TimeDatum.java index 5786657..e70d7d5 100644 --- a/tajo-common/src/main/java/org/apache/tajo/datum/TimeDatum.java +++ b/tajo-common/src/main/java/org/apache/tajo/datum/TimeDatum.java @@ -20,8 +20,9 @@ package org.apache.tajo.datum; import com.google.common.primitives.Longs; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.InvalidOperationException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.util.Bytes; import org.apache.tajo.util.datetime.DateTimeConstants.DateStyle; import org.apache.tajo.util.datetime.DateTimeUtil; @@ -73,7 +74,7 @@ public class TimeDatum extends Datum { @Override public int asInt4() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.TIME, TajoDataTypes.Type.INT4)); } @Override @@ -83,12 +84,12 @@ public class TimeDatum extends Datum { @Override public float asFloat4() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.TIME, TajoDataTypes.Type.FLOAT4)); } @Override public double asFloat8() { - throw new InvalidCastException(); + throw new TajoRuntimeException(new InvalidValueForCastException(TajoDataTypes.Type.TIME, TajoDataTypes.Type.FLOAT8)); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/exception/ErrorMessages.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/exception/ErrorMessages.java b/tajo-common/src/main/java/org/apache/tajo/exception/ErrorMessages.java index a3f18e3..9649644 100644 --- a/tajo-common/src/main/java/org/apache/tajo/exception/ErrorMessages.java +++ b/tajo-common/src/main/java/org/apache/tajo/exception/ErrorMessages.java @@ -39,6 +39,7 @@ public class ErrorMessages { ADD_MESSAGE(NOT_IMPLEMENTED, "not implemented feature: %s", 1); ADD_MESSAGE(FEATURE_NOT_SUPPORTED, "unsupported feature: %s", 1); ADD_MESSAGE(INVALID_RPC_CALL, "invalid RPC Call: %s", 1); + ADD_MESSAGE(INVALID_VALUE_FOR_CAST, "%s value cannot be casted to %s", 2); // Query Management and Scheduler ADD_MESSAGE(QUERY_FAILED, "query has been failed due to %s", 1); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/exception/ExceptionUtil.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/exception/ExceptionUtil.java b/tajo-common/src/main/java/org/apache/tajo/exception/ExceptionUtil.java index 5a75a18..6da16ee 100644 --- a/tajo-common/src/main/java/org/apache/tajo/exception/ExceptionUtil.java +++ b/tajo-common/src/main/java/org/apache/tajo/exception/ExceptionUtil.java @@ -74,6 +74,7 @@ public class ExceptionUtil { ADD_EXCEPTION(AMBIGUOUS_FUNCTION, AmbiguousFunctionException.class); ADD_EXCEPTION(DATATYPE_MISMATCH, DataTypeMismatchException.class); + ADD_EXCEPTION(DATATYPE_MISMATCH, InvalidValueForCastException.class); ADD_EXCEPTION(UNAVAILABLE_TABLE_LOCATION, UnavailableTableLocationException.class); ADD_EXCEPTION(UNKNOWN_DATAFORMAT, UnknownDataFormatException.class); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/exception/InvalidCastException.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/exception/InvalidCastException.java b/tajo-common/src/main/java/org/apache/tajo/exception/InvalidCastException.java deleted file mode 100644 index f562924..0000000 --- a/tajo-common/src/main/java/org/apache/tajo/exception/InvalidCastException.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tajo.exception; - -import org.apache.tajo.common.TajoDataTypes; - -public class InvalidCastException extends RuntimeException { - private static final long serialVersionUID = -7689027447969916148L; - - public InvalidCastException() { - } - - public InvalidCastException(TajoDataTypes.DataType src, TajoDataTypes.DataType target) { - super(src.getType().name() + " value cannot be casted to " + target.getType().name()); - } - - public InvalidCastException(TajoDataTypes.Type src, TajoDataTypes.Type target) { - super(src.name() + " value cannot be casted to " + target.name()); - } -} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/exception/InvalidValueForCastException.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/exception/InvalidValueForCastException.java b/tajo-common/src/main/java/org/apache/tajo/exception/InvalidValueForCastException.java new file mode 100644 index 0000000..5baf7c6 --- /dev/null +++ b/tajo-common/src/main/java/org/apache/tajo/exception/InvalidValueForCastException.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.exception; + +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.error.Errors; +import org.apache.tajo.rpc.protocolrecords.PrimitiveProtos.ReturnState; + +public class InvalidValueForCastException extends TajoException { + private static final long serialVersionUID = -7689027447969916148L; + + public InvalidValueForCastException(ReturnState state) { + super(state); + } + + public InvalidValueForCastException(TajoDataTypes.DataType src, TajoDataTypes.DataType target) { + super(Errors.ResultCode.INVALID_VALUE_FOR_CAST, src.getType().name(), target.getType().name()); + } + + public InvalidValueForCastException(TajoDataTypes.Type src, TajoDataTypes.Type target) { + super(Errors.ResultCode.INVALID_VALUE_FOR_CAST, src.name(), target.name()); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/exception/SQLExceptionUtil.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/exception/SQLExceptionUtil.java b/tajo-common/src/main/java/org/apache/tajo/exception/SQLExceptionUtil.java index 0e22a87..b5708e1 100644 --- a/tajo-common/src/main/java/org/apache/tajo/exception/SQLExceptionUtil.java +++ b/tajo-common/src/main/java/org/apache/tajo/exception/SQLExceptionUtil.java @@ -25,6 +25,7 @@ import org.apache.tajo.rpc.protocolrecords.PrimitiveProtos.ReturnState; import java.sql.SQLException; import java.util.Map; +import static org.apache.tajo.error.Errors.ResultCode.INVALID_VALUE_FOR_CAST; import static org.apache.tajo.exception.ReturnStateUtil.isError; public class SQLExceptionUtil { @@ -55,7 +56,7 @@ public class SQLExceptionUtil { // Data Exception (SQLState Class - 22) SQLSTATES.put(ResultCode.DIVISION_BY_ZERO, "22012"); - + SQLSTATES.put(ResultCode.INVALID_VALUE_FOR_CAST, "22T01"); // Section: Class 42 - Syntax Error or Access Rule Violation SQLSTATES.put(ResultCode.SYNTAX_ERROR, "42601"); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java index 75af292..ba0c37b 100644 --- a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java +++ b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java @@ -78,9 +78,26 @@ public class StorageConstants { public static final String DEFAULT_BINARY_SERDE = "org.apache.tajo.storage.BinarySerializerDeserializer"; public static final String DEFAULT_TEXT_SERDE = "org.apache.tajo.storage.TextSerializerDeserializer"; + // ORC file properties ------------------------------------------------- public static final String ORC_MAX_MERGE_DISTANCE = "orc.max.merge.distance"; public static final String DEFAULT_ORC_MAX_MERGE_DISTANCE = "1048576"; // 1MB + public static final String ORC_STRIPE_SIZE = "orc.stripe.size"; + public static final String DEFAULT_ORC_STRIPE_SIZE = "67108864"; // 64MB + + public static final String ORC_COMPRESSION_KIND = "orc.compression.kind"; + public static final String ORC_COMPRESSION_KIND_NONE = "none"; + public static final String ORC_COMPRESSION_KIND_SNAPPY = "snappy"; + public static final String ORC_COMPRESSION_KIND_LZO = "lzo"; + public static final String ORC_COMPRESSION_KIND_ZIP = "zlip"; + public static final String DEFAULT_ORC_COMPRESSION_KIND = ORC_COMPRESSION_KIND_NONE; + + public static final String ORC_BUFFER_SIZE = "orc.buffer.size"; + public static final String DEFAULT_ORC_BUFFER_SIZE = "262144"; // 256KB + + public static final String ORC_ROW_INDEX_STRIDE = "orc.rowindex.stride"; + public static final String DEFAULT_ORC_ROW_INDEX_STRIDE = "10000"; + // Parquet file properties ------------------------------------------------- public static final String PARQUET_DEFAULT_BLOCK_SIZE; public static final String PARQUET_DEFAULT_PAGE_SIZE; http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/main/proto/errors.proto ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/proto/errors.proto b/tajo-common/src/main/proto/errors.proto index f830b11..6a1780b 100644 --- a/tajo-common/src/main/proto/errors.proto +++ b/tajo-common/src/main/proto/errors.proto @@ -93,7 +93,7 @@ enum ResultCode { // Data Exception (SQLState Class - 22) DIVISION_BY_ZERO = 451; // SQLState: 22012 - Division by zero - + INVALID_VALUE_FOR_CAST = 452; // SQLState: 22T01 - Invalid data value for cast // Section: Class 42 - Syntax Error or Access Rule Violation SYNTAX_ERROR_OR_ACCESS_RULE_VIOLATION = 501; // SQLState: 42000 @@ -147,7 +147,6 @@ enum ResultCode { // Expressions INVALID_EXPRESSION = 701; - INVALID_CAST = 702; INVALID_DATATYPE = 703; NUMERIC_OVERFLOW = 803; // Numeric value overflow http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/test/java/org/apache/tajo/datum/TestDateDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/test/java/org/apache/tajo/datum/TestDateDatum.java b/tajo-common/src/test/java/org/apache/tajo/datum/TestDateDatum.java index 41b4dca..2b787f5 100644 --- a/tajo-common/src/test/java/org/apache/tajo/datum/TestDateDatum.java +++ b/tajo-common/src/test/java/org/apache/tajo/datum/TestDateDatum.java @@ -19,7 +19,7 @@ package org.apache.tajo.datum; import org.apache.tajo.common.TajoDataTypes.Type; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.json.CommonGsonHelper; import org.junit.Test; @@ -49,13 +49,13 @@ public class TestDateDatum { assertEquals(d, copy); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsFloat4() { Datum d = DatumFactory.createDate(DATE); d.asFloat4(); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsFloat8() { Datum d = DatumFactory.createDate(DATE); d.asFloat8(); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/test/java/org/apache/tajo/datum/TestTimeDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/test/java/org/apache/tajo/datum/TestTimeDatum.java b/tajo-common/src/test/java/org/apache/tajo/datum/TestTimeDatum.java index ea641ec..457ff41 100644 --- a/tajo-common/src/test/java/org/apache/tajo/datum/TestTimeDatum.java +++ b/tajo-common/src/test/java/org/apache/tajo/datum/TestTimeDatum.java @@ -20,7 +20,7 @@ package org.apache.tajo.datum; import org.apache.tajo.common.TajoDataTypes.Type; import org.apache.tajo.common.TajoDataTypes.DataType; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.json.CommonGsonHelper; import org.junit.Test; @@ -36,7 +36,7 @@ public class TestTimeDatum { assertEquals(Type.TIME, d.type()); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsInt4() { Datum d = DatumFactory.createTime(TIME); Datum copy = DatumFactory.createTime(d.asInt4()); @@ -50,13 +50,13 @@ public class TestTimeDatum { assertEquals(d, copy); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsFloat4() { Datum d = DatumFactory.createTime(TIME); d.asFloat4(); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsFloat8() { Datum d = DatumFactory.createTime(TIME); d.asFloat8(); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-common/src/test/java/org/apache/tajo/datum/TestTimestampDatum.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/test/java/org/apache/tajo/datum/TestTimestampDatum.java b/tajo-common/src/test/java/org/apache/tajo/datum/TestTimestampDatum.java index 7cb3123..dc8a881 100644 --- a/tajo-common/src/test/java/org/apache/tajo/datum/TestTimestampDatum.java +++ b/tajo-common/src/test/java/org/apache/tajo/datum/TestTimestampDatum.java @@ -19,7 +19,7 @@ package org.apache.tajo.datum; import org.apache.tajo.common.TajoDataTypes.Type; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.json.CommonGsonHelper; import org.apache.tajo.util.datetime.DateTimeUtil; import org.junit.BeforeClass; @@ -50,7 +50,7 @@ public class TestTimestampDatum { assertEquals(Type.TIMESTAMP, d.type()); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsInt4() { Datum d = DatumFactory.createTimestmpDatumWithUnixTime(unixtime); d.asInt4(); @@ -63,13 +63,13 @@ public class TestTimestampDatum { assertEquals(DateTimeUtil.javaTimeToJulianTime(javaTime), d.asInt8()); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsFloat4() { Datum d = DatumFactory.createTimestmpDatumWithUnixTime(unixtime); d.asFloat4(); } - @Test(expected = InvalidCastException.class) + @Test(expected = TajoRuntimeException.class) public final void testAsFloat8() { int instance = 1386577582; Datum d = DatumFactory.createTimestmpDatumWithUnixTime(instance); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java index 36c2568..130b02d 100644 --- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java +++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestSelectQuery.java @@ -671,7 +671,7 @@ public class TestSelectQuery extends QueryTestCaseBase { try { executeDDL("datetime_table_timezoned_ddl.sql", "timezoned", "timezoned_load1"); executeDDL("datetime_table_timezoned_ddl2.sql", null, "timezoned_load2"); - executeString("insert overwrite into timezoned_load2 select * from timezoned_load1"); + executeString("INSERT OVERWRITE INTO timezoned_load2 SELECT * FROM timezoned_load1"); ResultSet res = executeQuery(); assertResultSet(res, "testTimezonedTable3.result"); @@ -682,6 +682,25 @@ public class TestSelectQuery extends QueryTestCaseBase { executeString("DROP TABLE IF EXISTS timezoned_load2 PURGE"); } } + + @Test + public void testTimezonedORCTable() throws Exception { + try { + + executeDDL("datetime_table_timezoned_ddl.sql", "timezoned", "timezoned"); + executeDDL("datetime_table_timezoned_orc_ddl.sql", null, "timezoned_orc"); + + executeString("INSERT OVERWRITE INTO timezoned_orc SELECT t_timestamp, t_date FROM timezoned"); + + ResultSet res = executeQuery(); + assertResultSet(res, "testTimezonedORCTable.result"); + executeString("SET TIME ZONE 'GMT'"); + cleanupQuery(res); + } finally { + executeString("DROP TABLE IF EXISTS timezoned"); + executeString("DROP TABLE IF EXISTS timezoned_orc PURGE"); + } + } @Test public void testMultiBytesDelimiter1() throws Exception { http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql new file mode 100644 index 0000000..49e1f7e --- /dev/null +++ b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/datetime_table_timezoned_orc_ddl.sql @@ -0,0 +1,4 @@ +CREATE TABLE ${0} ( + t_timestamp TIMESTAMP, + t_date DATE +) USING ORC WITH ('timezone' = 'GMT+9') http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql new file mode 100644 index 0000000..1d898bd --- /dev/null +++ b/tajo-core-tests/src/test/resources/queries/TestSelectQuery/testTimezonedORCTable.sql @@ -0,0 +1,2 @@ +SET SESSION TIMEZONE = 'GMT+9'; +SELECT * FROM timezoned_orc; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result ---------------------------------------------------------------------- diff --git a/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result b/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result new file mode 100644 index 0000000..39f593b --- /dev/null +++ b/tajo-core-tests/src/test/resources/results/TestSelectQuery/testTimezonedORCTable.result @@ -0,0 +1,5 @@ +t_timestamp,t_date +------------------------------- +1980-04-01 01:50:30.01,1980-04-01 +1980-04-01 01:50:30,1980-04-01 +1980-04-01 01:50:30,1980-04-01 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java index 2fdafa0..6c3bf17 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/codegen/TajoGeneratorAdapter.java @@ -22,7 +22,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.*; -import org.apache.tajo.exception.InvalidCastException; +import org.apache.tajo.exception.InvalidValueForCastException; import org.apache.tajo.exception.TajoRuntimeException; import org.apache.tajo.exception.UnsupportedException; import org.apache.tajo.org.objectweb.asm.Label; @@ -471,7 +471,7 @@ class TajoGeneratorAdapter { case FLOAT8: methodvisitor.visitInsn(Opcodes.I2D); break; case TEXT: emitStringValueOfChar(); break; default: - throw new InvalidCastException(srcType, targetType); + throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } } else { switch (targetRawType) { @@ -483,7 +483,7 @@ class TajoGeneratorAdapter { case FLOAT4: emitParseFloat4(); break; case FLOAT8: emitParseFloat8(); break; case TEXT: break; - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } } break; @@ -500,7 +500,7 @@ class TajoGeneratorAdapter { case FLOAT4: methodvisitor.visitInsn(Opcodes.I2F); break; case FLOAT8: methodvisitor.visitInsn(Opcodes.I2D); break; case TEXT: emitStringValueOfInt4(); break; - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } break; case INT8: @@ -513,7 +513,7 @@ class TajoGeneratorAdapter { case FLOAT4: methodvisitor.visitInsn(Opcodes.L2F); break; case FLOAT8: methodvisitor.visitInsn(Opcodes.L2D); break; case TEXT: emitStringValueOfInt8(); break; - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } break; case FLOAT4: @@ -526,7 +526,7 @@ class TajoGeneratorAdapter { case FLOAT4: return; case FLOAT8: methodvisitor.visitInsn(Opcodes.F2D); break; case TEXT: emitStringValueOfFloat4(); break; - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } break; case FLOAT8: @@ -539,7 +539,7 @@ class TajoGeneratorAdapter { case FLOAT4: methodvisitor.visitInsn(Opcodes.D2F); break; case FLOAT8: return; case TEXT: emitStringValueOfFloat8(); break; - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } break; case TEXT: @@ -567,10 +567,10 @@ class TajoGeneratorAdapter { "toJulianTime", "(L" + Type.getInternalName(String.class) + ";)J"); break; } - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } break; - default: throw new InvalidCastException(srcType, targetType); + default: throw new TajoRuntimeException(new InvalidValueForCastException(srcType, targetType)); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java index 5bd157b..74cacbe 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/function/stream/TextFieldSerializerDeserializer.java @@ -237,7 +237,7 @@ public class TextFieldSerializerDeserializer implements FieldSerializerDeseriali byte[] bytes = new byte[buf.readableBytes()]; buf.readBytes(bytes); protobufJsonFormat.merge(bytes, builder); - datum = factory.createDatum(builder.build()); + datum = ProtobufDatumFactory.createDatum(builder.build()); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java index 2cccb69..ae1e68d 100644 --- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java +++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/BinarySerializerDeserializer.java @@ -160,10 +160,8 @@ public class BinarySerializerDeserializer implements SerializerDeserializer { break; } case PROTOBUF: { - ProtobufDatumFactory factory = ProtobufDatumFactory.get(column.getDataType().getCode()); - Message.Builder builder = factory.newBuilder(); - builder.mergeFrom(bytes, offset, length); - datum = factory.createDatum(builder); + datum = ProtobufDatumFactory.createDatum(column.getDataType().getCode(), + bytes, offset, length); break; } case INET4: http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml index 676c072..6b10b0b 100644 --- a/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml +++ b/tajo-storage/tajo-storage-common/src/main/resources/storage-default.xml @@ -147,7 +147,7 @@ <!--- Appender Handler --> <property> <name>tajo.storage.appender-handler</name> - <value>text,raw,rcfile,row,parquet,sequencefile,avro,hbase</value> + <value>text,raw,rcfile,row,parquet,orc,sequencefile,avro,hbase</value> </property> <property> @@ -186,6 +186,11 @@ </property> <property> + <name>tajo.storage.appender-handler.orc.class</name> + <value>org.apache.tajo.storage.orc.ORCAppender</value> + </property> + + <property> <name>tajo.storage.appender-handler.sequencefile.class</name> <value>org.apache.tajo.storage.sequencefile.SequenceFileAppender</value> </property> http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml index 8a9b9ea..139133b 100644 --- a/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml +++ b/tajo-storage/tajo-storage-common/src/test/resources/storage-default.xml @@ -124,6 +124,9 @@ </property> <property> + </property> + + <property> <name>tajo.storage.scanner-handler.orc.class</name> <value>org.apache.tajo.storage.orc.ORCScanner</value> </property> @@ -185,6 +188,11 @@ </property> <property> + <name>tajo.storage.appender-handler.orc.class</name> + <value>org.apache.tajo.storage.orc.ORCAppender</value> + </property> + + <property> <name>tajo.storage.appender-handler.sequencefile.class</name> <value>org.apache.tajo.storage.sequencefile.SequenceFileAppender</value> </property> http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/pom.xml b/tajo-storage/tajo-storage-hdfs/pom.xml index 7740901..c3057d7 100644 --- a/tajo-storage/tajo-storage-hdfs/pom.xml +++ b/tajo-storage/tajo-storage-hdfs/pom.xml @@ -130,6 +130,7 @@ <argument>--proto_path=../../tajo-catalog/tajo-catalog-common/src/main/proto</argument> <argument>--java_out=target/generated-sources/proto</argument> <argument>src/main/proto/StorageFragmentProtos.proto</argument> + <argument>src/main/proto/orc_proto.proto</argument> </arguments> </configuration> <goals> http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java index 4994b0e..97e293c 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java @@ -338,10 +338,7 @@ public class RawFile { byte [] rawBytes = new byte[len]; buffer.get(rawBytes); - ProtobufDatumFactory factory = ProtobufDatumFactory.get(columnTypes[i]); - Message.Builder builder = factory.newBuilder(); - builder.mergeFrom(rawBytes); - outTuple.put(i, factory.createDatum(builder.build())); + outTuple.put(i, ProtobufDatumFactory.createDatum(columnTypes[i], rawBytes)); break; } http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java new file mode 100644 index 0000000..4544ed3 --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCAppender.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.tajo.TajoConstants; +import org.apache.tajo.TaskAttemptId; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.TableMeta; +import org.apache.tajo.catalog.statistics.TableStats; +import org.apache.tajo.storage.FileAppender; +import org.apache.tajo.storage.StorageConstants; +import org.apache.tajo.storage.TableStatistics; +import org.apache.tajo.storage.Tuple; +import org.apache.tajo.storage.orc.objectinspector.ObjectInspectorFactory; +import org.apache.tajo.storage.thirdparty.orc.CompressionKind; +import org.apache.tajo.storage.thirdparty.orc.OrcFile; +import org.apache.tajo.storage.thirdparty.orc.Writer; + +import java.io.IOException; +import java.util.TimeZone; + +public class ORCAppender extends FileAppender { + private Writer writer; + private TableStatistics stats; + private TimeZone timezone; + + public ORCAppender(Configuration conf, TaskAttemptId taskAttemptId, Schema schema, + TableMeta meta, Path workDir) { + super(conf, taskAttemptId, schema, meta, workDir); + + timezone = TimeZone.getTimeZone(meta.getOption(StorageConstants.TIMEZONE, + TajoConstants.DEFAULT_SYSTEM_TIMEZONE)); + } + + @Override + public void init() throws IOException { + writer = OrcFile.createWriter(workDir.getFileSystem(conf), path, conf, + ObjectInspectorFactory.buildStructObjectInspector(schema), + Long.parseLong(meta.getOption(StorageConstants.ORC_STRIPE_SIZE, + StorageConstants.DEFAULT_ORC_STRIPE_SIZE)), getCompressionKind(), + Integer.parseInt(meta.getOption(StorageConstants.ORC_BUFFER_SIZE, + StorageConstants.DEFAULT_ORC_BUFFER_SIZE)), + Integer.parseInt(meta.getOption(StorageConstants.ORC_ROW_INDEX_STRIDE, + StorageConstants.DEFAULT_ORC_ROW_INDEX_STRIDE)), + timezone); + + if (enabledStats) { + this.stats = new TableStatistics(schema); + } + + super.init(); + } + + @Override + public long getOffset() throws IOException { + return 0; + } + + @Override + public void addTuple(Tuple tuple) throws IOException { + if (enabledStats) { + for (int i = 0; i < schema.size(); ++i) { + stats.analyzeField(i, tuple); + } + } + writer.addTuple(tuple); + if (enabledStats) { + stats.incrementRow(); + } + } + + @Override + public void flush() throws IOException { + } + + @Override + public void close() throws IOException { + writer.close(); + } + + @Override + public TableStats getStats() { + if (enabledStats) { + return stats.getTableStat(); + } else { + return null; + } + } + + @Override + public long getEstimatedOutputSize() throws IOException { + return writer.getRawDataSize() * writer.getNumberOfRows(); + } + + private CompressionKind getCompressionKind() { + String kindstr = meta.getOption(StorageConstants.ORC_COMPRESSION_KIND, StorageConstants.DEFAULT_ORC_COMPRESSION_KIND); + + if (kindstr.equalsIgnoreCase(StorageConstants.ORC_COMPRESSION_KIND_ZIP)) { + return CompressionKind.ZLIB; + } + + if (kindstr.equalsIgnoreCase(StorageConstants.ORC_COMPRESSION_KIND_SNAPPY)) { + return CompressionKind.SNAPPY; + } + + if (kindstr.equalsIgnoreCase(StorageConstants.ORC_COMPRESSION_KIND_LZO)) { + return CompressionKind.LZO; + } + + return CompressionKind.NONE; + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java index 1652d24..8a9d623 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/ORCScanner.java @@ -18,12 +18,14 @@ package org.apache.tajo.storage.orc; +import com.google.protobuf.InvalidProtocolBufferException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.tajo.TajoConstants; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.common.TajoDataTypes; @@ -46,6 +48,7 @@ import org.joda.time.DateTimeZone; import java.io.IOException; import java.util.HashSet; import java.util.Set; +import java.util.TimeZone; /** * OrcScanner for reading ORC files @@ -152,17 +155,17 @@ public class ORCScanner extends FileScanner { orcReader = new OrcReader(orcDataSource, new OrcMetadataReader()); + TimeZone timezone = TimeZone.getTimeZone(meta.getOption(StorageConstants.TIMEZONE, + TajoConstants.DEFAULT_SYSTEM_TIMEZONE)); + // TODO: make OrcPredicate useful - // TODO: TimeZone should be from conf - // TODO: it might be splittable + // presto-orc uses joda timezone, so it needs to be converted. recordReader = orcReader.createRecordReader(columnSet, OrcPredicate.TRUE, - fragment.getStartKey(), fragment.getLength(), DateTimeZone.getDefault()); + fragment.getStartKey(), fragment.getLength(), DateTimeZone.forTimeZone(timezone)); LOG.debug("file fragment { path: " + fragment.getPath() + ", start offset: " + fragment.getStartKey() + ", length: " + fragment.getLength() + "}"); - - getNextBatch(); } @Override @@ -243,6 +246,18 @@ public class ORCScanner extends FileScanner { return DatumFactory.createBlob(((SliceVector) vector).vector[currentPosInBatch].getBytes()); + case PROTOBUF: + try { + if (((SliceVector) vector).vector[currentPosInBatch] == null) + return NullDatum.get(); + + return ProtobufDatumFactory.createDatum(type, + ((SliceVector) vector).vector[currentPosInBatch].getBytes()); + } catch (InvalidProtocolBufferException e) { + LOG.error("ERROR", e); + return NullDatum.get(); + } + case TIMESTAMP: if (((LongVector) vector).isNull[currentPosInBatch]) return NullDatum.get(); @@ -279,6 +294,10 @@ public class ORCScanner extends FileScanner { private void getNextBatch() throws IOException { batchSize = recordReader.nextBatch(); + // end of file + if (batchSize == -1) + return; + for (int i=0; i<targetColInfo.length; i++) { recordReader.readVector(targetColInfo[i].id, vectors[i]); } http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java new file mode 100644 index 0000000..061ba0d --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/ObjectInspectorFactory.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc.objectinspector; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.tajo.catalog.Schema; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.exception.UnsupportedException; + +public class ObjectInspectorFactory { + + public static StructObjectInspector buildStructObjectInspector(Schema schema) { + StructObjectInspector structOI = new TajoStructObjectInspector(schema); + return structOI; + } + + public static ObjectInspector buildObjectInspectorByType(TajoDataTypes.Type dataType) throws UnsupportedException { + ObjectInspector oi = null; + + switch(dataType) { + case BOOLEAN: + oi = new TajoBooleanObjectInspector(); + break; + + case INT2: + oi = new TajoShortObjectInspector(); + break; + + case INET4: + case INT4: + oi = new TajoIntObjectInspector(); + break; + + case INT8: + oi = new TajoLongObjectInspector(); + break; + + case FLOAT4: + oi = new TajoFloatObjectInspector(); + break; + + case FLOAT8: + oi = new TajoDoubleObjectInspector(); + break; + + case TEXT: + case CHAR: + oi = new TajoStringObjectInspector(); + break; + + case TIMESTAMP: + oi = new TajoTimestampObjectInspector(); + break; + + case DATE: + oi = new TajoDateObjectInspector(); + break; + + case BLOB: + case PROTOBUF: + oi = new TajoBlobObjectInspector(); + break; + + case NULL_TYPE: + oi = new TajoNullObjectInspector(); + break; + + default: + throw new UnsupportedException(dataType.name()+" is not supported yet in OrcAppender"); + } + + return oi; + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java new file mode 100644 index 0000000..d241f84 --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBlobObjectInspector.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc.objectinspector; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.tajo.datum.Datum; + +public class TajoBlobObjectInspector extends TajoPrimitiveObjectInspector implements BinaryObjectInspector { + @Override + public PrimitiveTypeInfo getTypeInfo() { + return TypeInfoFactory.binaryTypeInfo; + } + + @Override + public PrimitiveCategory getPrimitiveCategory() { + return PrimitiveCategory.BINARY; + } + + @Override + public Class<?> getPrimitiveWritableClass() { + return null; + } + + @Override + public BytesWritable getPrimitiveWritableObject(Object o) { + return null; + } + + @Override + public Class<?> getJavaPrimitiveClass() { + return byte [].class; + } + + @Override + public byte[] getPrimitiveJavaObject(Object o) { + return ((Datum)o).asByteArray(); + } + + @Override + public Object copyObject(Object o) { + return null; + } + + @Override + public boolean preferWritable() { + return false; + } + + @Override + public int precision() { + return 0; + } + + @Override + public int scale() { + return 0; + } + + @Override + public String getTypeName() { + return "BINARY"; + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java new file mode 100644 index 0000000..273505f --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoBooleanObjectInspector.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc.objectinspector; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.tajo.datum.Datum; + +public class TajoBooleanObjectInspector extends TajoPrimitiveObjectInspector implements BooleanObjectInspector { + @Override + public boolean get(Object o) { + return ((Datum)o).asBool(); + } + + @Override + public PrimitiveTypeInfo getTypeInfo() { + return TypeInfoFactory.booleanTypeInfo; + } + + @Override + public PrimitiveCategory getPrimitiveCategory() { + return PrimitiveCategory.BOOLEAN; + } + + @Override + public Class<?> getPrimitiveWritableClass() { + return null; + } + + @Override + public Object getPrimitiveWritableObject(Object o) { + return null; + } + + @Override + public Class<?> getJavaPrimitiveClass() { + return Boolean.class; + } + + @Override + public Object getPrimitiveJavaObject(Object o) { + return null; + } + + @Override + public Object copyObject(Object o) { + return null; + } + + @Override + public boolean preferWritable() { + return false; + } + + @Override + public String getTypeName() { + return "BOOLEAN"; + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java new file mode 100644 index 0000000..f12706b --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDateObjectInspector.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc.objectinspector; + +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import java.sql.Date; + +public class TajoDateObjectInspector extends TajoPrimitiveObjectInspector implements DateObjectInspector { + @Override + public PrimitiveTypeInfo getTypeInfo() { + return TypeInfoFactory.dateTypeInfo; + } + + @Override + public PrimitiveCategory getPrimitiveCategory() { + return PrimitiveCategory.DATE; + } + + @Override + public Class<?> getPrimitiveWritableClass() { + return null; + } + + @Override + public DateWritable getPrimitiveWritableObject(Object o) { + return null; + } + + @Override + public Class<?> getJavaPrimitiveClass() { + return null; + } + + @Override + public Date getPrimitiveJavaObject(Object o) { + return null; + } + + @Override + public Object copyObject(Object o) { + return null; + } + + @Override + public boolean preferWritable() { + return false; + } + + @Override + public String getTypeName() { + return "DATE"; + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java new file mode 100644 index 0000000..6dc1f8c --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoDoubleObjectInspector.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc.objectinspector; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.tajo.datum.Float8Datum; + +public class TajoDoubleObjectInspector extends TajoPrimitiveObjectInspector implements DoubleObjectInspector { + @Override + public double get(Object o) { + return ((Float8Datum)o).asFloat8(); + } + + @Override + public PrimitiveTypeInfo getTypeInfo() { + return TypeInfoFactory.doubleTypeInfo; + } + + @Override + public PrimitiveCategory getPrimitiveCategory() { + return PrimitiveCategory.DOUBLE; + } + + @Override + public Class<?> getPrimitiveWritableClass() { + return null; + } + + @Override + public Object getPrimitiveWritableObject(Object o) { + return null; + } + + @Override + public Class<?> getJavaPrimitiveClass() { + return Double.class; + } + + @Override + public Object getPrimitiveJavaObject(Object o) { + return null; + } + + @Override + public Object copyObject(Object o) { + return null; + } + + @Override + public boolean preferWritable() { + return false; + } + + @Override + public String getTypeName() { + return "DOUBLE"; + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/8763d42b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java new file mode 100644 index 0000000..8f4ffde --- /dev/null +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/orc/objectinspector/TajoFloatObjectInspector.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.storage.orc.objectinspector; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.tajo.datum.Float4Datum; +import org.apache.tajo.datum.Int8Datum; + +public class TajoFloatObjectInspector extends TajoPrimitiveObjectInspector implements DoubleObjectInspector { + @Override + public double get(Object o) { + return ((Float4Datum)o).asFloat4(); + } + + @Override + public PrimitiveTypeInfo getTypeInfo() { + return TypeInfoFactory.floatTypeInfo; + } + + @Override + public PrimitiveCategory getPrimitiveCategory() { + return PrimitiveCategory.FLOAT; + } + + @Override + public Class<?> getPrimitiveWritableClass() { + return null; + } + + @Override + public Object getPrimitiveWritableObject(Object o) { + return null; + } + + @Override + public Class<?> getJavaPrimitiveClass() { + return Float.class; + } + + @Override + public Object getPrimitiveJavaObject(Object o) { + return null; + } + + @Override + public Object copyObject(Object o) { + return null; + } + + @Override + public boolean preferWritable() { + return false; + } + + @Override + public String getTypeName() { + return "FLOAT"; + } +}
