http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out b/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out new file mode 100644 index 0000000..4f00bed --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_orc_string_reader_empty_dict.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orcstr +POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orcstr +PREHOOK: query: insert overwrite table orcstr select null from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select null from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +NULL +PREHOOK: query: insert overwrite table orcstr select "" from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@orcstr +POSTHOOK: query: insert overwrite table orcstr select "" from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@orcstr +POSTHOOK: Lineage: orcstr.vcol EXPRESSION [] +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### + +PREHOOK: query: select vcol from orcstr limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orcstr +#### A masked pattern was here #### +POSTHOOK: query: select vcol from orcstr limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orcstr +#### A masked pattern was here #### +
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out index a733991..f23a359 100644 --- a/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out @@ -592,6 +592,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out new file mode 100644 index 0000000..b2bf415 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_tablesample_rows.q.out @@ -0,0 +1,307 @@ +PREHOOK: query: explain +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'key1' (type: string), 'value1' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +_c0 _c1 +key1 value1 +PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: _col0 + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +_col0 +PREHOOK: query: select count(*) from decimal_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from decimal_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +c0 +1 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: -- Dummy tables HIVE-13190 +explain +select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +POSTHOOK: query: -- Dummy tables HIVE-13190 +explain +select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: explain +create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain +create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: _c0 int + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + isTemporary: true + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@dual +POSTHOOK: query: create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dual +_c0 +PREHOOK: query: select * from dual +PREHOOK: type: QUERY +PREHOOK: Input: default@dual +#### A masked pattern was here #### +POSTHOOK: query: select * from dual +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dual +#### A masked pattern was here #### +dual._c0 +1 http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out index a783a36..ef92b89 100644 --- a/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out +++ b/ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -554,6 +554,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index 25f2996..7e5f24d 100644 --- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -195,7 +195,6 @@ STAGE PLANS: key expressions: 1 (type: int) sort order: + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized Reduce Operator Tree: Select Operator Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE @@ -287,7 +286,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dual - Execution mode: vectorized Stage: Stage-7 Conditional Operator http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index c6ff748..be36ba4 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.binarysortable.fast; -import java.io.EOFException; import java.io.IOException; import java.math.BigInteger; import java.util.Arrays; @@ -29,21 +28,10 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; /* @@ -60,11 +48,9 @@ import org.apache.hadoop.io.Text; * other type specific buffers. So, those references are only valid until the next time set is * called. */ -public final class BinarySortableDeserializeRead implements DeserializeRead { +public final class BinarySortableDeserializeRead extends DeserializeRead { public static final Logger LOG = LoggerFactory.getLogger(BinarySortableDeserializeRead.class.getName()); - private TypeInfo[] typeInfos; - // The sort order (ascending/descending) for each field. Set to true when descending (invert). private boolean[] columnSortOrderIsDesc; @@ -76,11 +62,10 @@ public final class BinarySortableDeserializeRead implements DeserializeRead { private int start; - private DecimalTypeInfo saveDecimalTypeInfo; - private HiveDecimal saveDecimal; + private byte[] tempTimestampBytes; + private Text tempText; private byte[] tempDecimalBuffer; - private HiveDecimalWritable tempHiveDecimalWritable; private boolean readBeyondConfiguredFieldsWarned; private boolean readBeyondBufferRangeWarned; @@ -97,7 +82,7 @@ public final class BinarySortableDeserializeRead implements DeserializeRead { public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean[] columnSortOrderIsDesc) { - this.typeInfos = typeInfos; + super(typeInfos); fieldCount = typeInfos.length; if (columnSortOrderIsDesc != null) { this.columnSortOrderIsDesc = columnSortOrderIsDesc; @@ -113,13 +98,7 @@ public final class BinarySortableDeserializeRead implements DeserializeRead { // Not public since we must have column information. private BinarySortableDeserializeRead() { - } - - /* - * The primitive type information for all fields. - */ - public TypeInfo[] typeInfos() { - return typeInfos; + super(); } /* @@ -148,42 +127,242 @@ public final class BinarySortableDeserializeRead implements DeserializeRead { if (fieldIndex >= fieldCount) { // Reading beyond the specified field count produces NULL. if (!readBeyondConfiguredFieldsWarned) { - // Warn only once. - LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " - + " reading more (NULLs returned). Ignoring similar problems."); - readBeyondConfiguredFieldsWarned = true; + doReadBeyondConfiguredFieldsWarned(); } return true; } if (inputByteBuffer.isEof()) { // Also, reading beyond our byte range produces NULL. if (!readBeyondBufferRangeWarned) { - // Warn only once. - int length = inputByteBuffer.tell() - start; - LOG.info("Reading beyond buffer range! Buffer range " + start - + " for length " + length + " but reading more... " - + "(total buffer length " + inputByteBuffer.getData().length + ")" - + " Ignoring similar problems."); - readBeyondBufferRangeWarned = true; + doReadBeyondBufferRangeWarned(); } // We cannot read beyond so we must return NULL here. return true; } - byte isNull = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); + byte isNullByte = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); - if (isNull == 0) { + if (isNullByte == 0) { return true; } - // We have a field and are positioned to it. + /* + * We have a field and are positioned to it. Read it. + */ + boolean isNull = false; // Assume. + switch (primitiveCategories[fieldIndex]) { + case BOOLEAN: + currentBoolean = (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) == 2); + break; + case BYTE: + currentByte = (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80); + break; + case SHORT: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + currentShort = (short) v; + } + break; + case INT: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + currentInt = v; + } + break; + case LONG: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 7; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + currentLong = v; + } + break; + case DATE: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + currentDateWritable.set(v); + } + break; + case TIMESTAMP: + { + if (tempTimestampBytes == null) { + tempTimestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH]; + } + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + for (int i = 0; i < tempTimestampBytes.length; i++) { + tempTimestampBytes[i] = inputByteBuffer.read(invert); + } + currentTimestampWritable.setBinarySortable(tempTimestampBytes, 0); + } + break; + case FLOAT: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = 0; + for (int i = 0; i < 4; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + if ((v & (1 << 31)) == 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1 << 31); + } + currentFloat = Float.intBitsToFloat(v); + } + break; + case DOUBLE: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long v = 0; + for (int i = 0; i < 8; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + if ((v & (1L << 63)) == 0) { + // negative number, flip all bits + v = ~v; + } else { + // positive number, flip the first bit + v = v ^ (1L << 63); + } + currentDouble = Double.longBitsToDouble(v); + } + break; + case BINARY: + case STRING: + case CHAR: + case VARCHAR: + { + if (tempText == null) { + tempText = new Text(); + } + BinarySortableSerDe.deserializeText( + inputByteBuffer, columnSortOrderIsDesc[fieldIndex], tempText); + currentBytes = tempText.getBytes(); + currentBytesStart = 0; + currentBytesLength = tempText.getLength(); + } + break; + case INTERVAL_YEAR_MONTH: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int v = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); + } + currentHiveIntervalYearMonthWritable.set(v); + } + break; + case INTERVAL_DAY_TIME: + { + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + long totalSecs = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 7; i++) { + totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff); + } + int nanos = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff); + } + currentHiveIntervalDayTimeWritable.set(totalSecs, nanos); + } + break; + case DECIMAL: + { + // Since enforcing precision and scale can cause a HiveDecimal to become NULL, + // we must read it, enforce it here, and either return NULL or buffer the result. + + final boolean invert = columnSortOrderIsDesc[fieldIndex]; + int b = inputByteBuffer.read(invert) - 1; + assert (b == 1 || b == -1 || b == 0); + boolean positive = b != -1; + + int factor = inputByteBuffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff); + } + + if (!positive) { + factor = -factor; + } + + int start = inputByteBuffer.tell(); + int length = 0; + + do { + b = inputByteBuffer.read(positive ? invert : !invert); + assert(b != 1); + + if (b == 0) { + // end of digits + break; + } + + length++; + } while (true); + + if(tempDecimalBuffer == null || tempDecimalBuffer.length < length) { + tempDecimalBuffer = new byte[length]; + } + + inputByteBuffer.seek(start); + for (int i = 0; i < length; ++i) { + tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert); + } + + // read the null byte again + inputByteBuffer.read(positive ? invert : !invert); + + String digits = new String(tempDecimalBuffer, 0, length, BinarySortableSerDe.decimalCharSet); + BigInteger bi = new BigInteger(digits); + HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor-length); + + if (!positive) { + bd = bd.negate(); + } + + // We have a decimal. After we enforce precision and scale, will it become a NULL? + + currentHiveDecimalWritable.set(bd); - if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { - return false; + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; + + int precision = decimalTypeInfo.getPrecision(); + int scale = decimalTypeInfo.getScale(); + + HiveDecimal decimal = currentHiveDecimalWritable.getHiveDecimal(precision, scale); + if (decimal == null) { + isNull = true; + } else { + // Put value back into writable. + currentHiveDecimalWritable.set(decimal); + } + } + break; + default: + throw new RuntimeException("Unexpected primitive type category " + primitiveCategories[fieldIndex]); + } + + /* + * Now that we have read through the field -- did we really want it? + */ + if (columnsToInclude != null && !columnsToInclude[fieldIndex]) { + isNull = true; } - // Since enforcing precision and scale may turn a HiveDecimal into a NULL, we must read - // it here. - return earlyReadHiveDecimal(); + return isNull; } /* @@ -196,7 +375,7 @@ public final class BinarySortableDeserializeRead implements DeserializeRead { // Warn only once. int length = inputByteBuffer.getEnd() - start; int remaining = inputByteBuffer.getEnd() - inputByteBuffer.tell(); - LOG.info("Not all fields were read in the buffer range! Buffer range " + start + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + " for length " + length + " but " + remaining + " bytes remain. " + "(total buffer length " + inputByteBuffer.getData().length + ")" + " Ignoring similar problems."); @@ -222,526 +401,23 @@ public final class BinarySortableDeserializeRead implements DeserializeRead { } /* - * BOOLEAN. - */ - @Override - public boolean readBoolean() throws IOException { - byte b = inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]); - return (b == 2); - } - - /* - * BYTE. - */ - @Override - public byte readByte() throws IOException { - return (byte) (inputByteBuffer.read(columnSortOrderIsDesc[fieldIndex]) ^ 0x80); - } - - /* - * SHORT. - */ - @Override - public short readShort() throws IOException { - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - int v = inputByteBuffer.read(invert) ^ 0x80; - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - return (short) v; - } - - /* - * INT. - */ - @Override - public int readInt() throws IOException { - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - int v = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 3; i++) { - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - } - return v; - } - - /* - * LONG. - */ - @Override - public long readLong() throws IOException { - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - long v = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 7; i++) { - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - } - return v; - } - - /* - * FLOAT. + * Pull these out of the regular execution path. */ - @Override - public float readFloat() throws IOException { - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - int v = 0; - for (int i = 0; i < 4; i++) { - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - } - if ((v & (1 << 31)) == 0) { - // negative number, flip all bits - v = ~v; - } else { - // positive number, flip the first bit - v = v ^ (1 << 31); - } - return Float.intBitsToFloat(v); - } - - /* - * DOUBLE. - */ - @Override - public double readDouble() throws IOException { - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - long v = 0; - for (int i = 0; i < 8; i++) { - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - } - if ((v & (1L << 63)) == 0) { - // negative number, flip all bits - v = ~v; - } else { - // positive number, flip the first bit - v = v ^ (1L << 63); - } - return Double.longBitsToDouble(v); - } - - // This class is for internal use. - private static class BinarySortableReadStringResults extends ReadStringResults { - - // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable - // format string into. - private Text text; - - public BinarySortableReadStringResults() { - super(); - text = new Text(); - } - } - - // Reading a STRING field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. - @Override - public ReadStringResults createReadStringResults() { - return new BinarySortableReadStringResults(); - } - - - @Override - public void readString(ReadStringResults readStringResults) throws IOException { - BinarySortableReadStringResults binarySortableReadStringResults = - (BinarySortableReadStringResults) readStringResults; - - BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadStringResults.text); - readStringResults.bytes = binarySortableReadStringResults.text.getBytes(); - readStringResults.start = 0; - readStringResults.length = binarySortableReadStringResults.text.getLength(); - } - - - /* - * CHAR. - */ - - // This class is for internal use. - private static class BinarySortableReadHiveCharResults extends ReadHiveCharResults { - - public BinarySortableReadHiveCharResults() { - super(); - } - - public HiveCharWritable getHiveCharWritable() { - return hiveCharWritable; - } - } - - // Reading a CHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different CHAR field. - @Override - public ReadHiveCharResults createReadHiveCharResults() { - return new BinarySortableReadHiveCharResults(); - } - - public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { - BinarySortableReadHiveCharResults binarySortableReadHiveCharResults = - (BinarySortableReadHiveCharResults) readHiveCharResults; - - if (!binarySortableReadHiveCharResults.isInit()) { - binarySortableReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]); - } - - HiveCharWritable hiveCharWritable = binarySortableReadHiveCharResults.getHiveCharWritable(); - - // Decode the bytes into our Text buffer, then truncate. - BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveCharWritable.getTextValue()); - hiveCharWritable.enforceMaxLength(binarySortableReadHiveCharResults.getMaxLength()); - - readHiveCharResults.bytes = hiveCharWritable.getTextValue().getBytes(); - readHiveCharResults.start = 0; - readHiveCharResults.length = hiveCharWritable.getTextValue().getLength(); - } - - /* - * VARCHAR. - */ - - // This class is for internal use. - private static class BinarySortableReadHiveVarcharResults extends ReadHiveVarcharResults { - - public BinarySortableReadHiveVarcharResults() { - super(); - } - - public HiveVarcharWritable getHiveVarcharWritable() { - return hiveVarcharWritable; - } - } - - // Reading a VARCHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different VARCHAR field. - @Override - public ReadHiveVarcharResults createReadHiveVarcharResults() { - return new BinarySortableReadHiveVarcharResults(); - } - - public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { - BinarySortableReadHiveVarcharResults binarySortableReadHiveVarcharResults = (BinarySortableReadHiveVarcharResults) readHiveVarcharResults; - - if (!binarySortableReadHiveVarcharResults.isInit()) { - binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]); - } - - HiveVarcharWritable hiveVarcharWritable = binarySortableReadHiveVarcharResults.getHiveVarcharWritable(); - - // Decode the bytes into our Text buffer, then truncate. - BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveVarcharWritable.getTextValue()); - hiveVarcharWritable.enforceMaxLength(binarySortableReadHiveVarcharResults.getMaxLength()); - - readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); - readHiveVarcharResults.start = 0; - readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); - } - - /* - * BINARY. - */ - - // This class is for internal use. - private static class BinarySortableReadBinaryResults extends ReadBinaryResults { - - // Use an org.apache.hadoop.io.Text object as a buffer to decode the BinarySortable - // format string into. - private Text text; - - public BinarySortableReadBinaryResults() { - super(); - text = new Text(); - } - } - - // Reading a BINARY field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. - @Override - public ReadBinaryResults createReadBinaryResults() { - return new BinarySortableReadBinaryResults(); - } - - @Override - public void readBinary(ReadBinaryResults readBinaryResults) throws IOException { - BinarySortableReadBinaryResults binarySortableReadBinaryResults = - (BinarySortableReadBinaryResults) readBinaryResults; - - BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], binarySortableReadBinaryResults.text); - readBinaryResults.bytes = binarySortableReadBinaryResults.text.getBytes(); - readBinaryResults.start = 0; - readBinaryResults.length = binarySortableReadBinaryResults.text.getLength(); - } - - /* - * DATE. - */ - - // This class is for internal use. - private static class BinarySortableReadDateResults extends ReadDateResults { - - public BinarySortableReadDateResults() { - super(); - } - - public DateWritable getDateWritable() { - return dateWritable; - } - } - - // Reading a DATE field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DATE field. - @Override - public ReadDateResults createReadDateResults() { - return new BinarySortableReadDateResults(); - } - - @Override - public void readDate(ReadDateResults readDateResults) throws IOException { - BinarySortableReadDateResults binarySortableReadDateResults = (BinarySortableReadDateResults) readDateResults; - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - int v = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 3; i++) { - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - } - DateWritable dateWritable = binarySortableReadDateResults.getDateWritable(); - dateWritable.set(v); - } - - /* - * TIMESTAMP. - */ - - // This class is for internal use. - private static class BinarySortableReadTimestampResults extends ReadTimestampResults { - - private byte[] timestampBytes; - - public BinarySortableReadTimestampResults() { - super(); - timestampBytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH]; - } - - public TimestampWritable getTimestampWritable() { - return timestampWritable; - } - } - - // Reading a TIMESTAMP field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different TIMESTAMP field. - @Override - public ReadTimestampResults createReadTimestampResults() { - return new BinarySortableReadTimestampResults(); - } - - @Override - public void readTimestamp(ReadTimestampResults readTimestampResults) throws IOException { - BinarySortableReadTimestampResults binarySortableReadTimestampResults = (BinarySortableReadTimestampResults) readTimestampResults; - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - byte[] timestampBytes = binarySortableReadTimestampResults.timestampBytes; - for (int i = 0; i < timestampBytes.length; i++) { - timestampBytes[i] = inputByteBuffer.read(invert); - } - TimestampWritable timestampWritable = binarySortableReadTimestampResults.getTimestampWritable(); - timestampWritable.setBinarySortable(timestampBytes, 0); - } - - /* - * INTERVAL_YEAR_MONTH. - */ - - // This class is for internal use. - private static class BinarySortableReadIntervalYearMonthResults extends ReadIntervalYearMonthResults { - - public BinarySortableReadIntervalYearMonthResults() { - super(); - } - - public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { - return hiveIntervalYearMonthWritable; - } - } - - // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. - // A separate results object is created by the caller at initialization per different - // INTERVAL_YEAR_MONTH field. - @Override - public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { - return new BinarySortableReadIntervalYearMonthResults(); - } - - @Override - public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResults) - throws IOException { - BinarySortableReadIntervalYearMonthResults binarySortableReadIntervalYearMonthResults = - (BinarySortableReadIntervalYearMonthResults) readIntervalYearMonthResults; - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - int v = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 3; i++) { - v = (v << 8) + (inputByteBuffer.read(invert) & 0xff); - } - HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = - binarySortableReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); - hiveIntervalYearMonthWritable.set(v); - } - - /* - * INTERVAL_DAY_TIME. - */ - - // This class is for internal use. - private static class BinarySortableReadIntervalDayTimeResults extends ReadIntervalDayTimeResults { - - public BinarySortableReadIntervalDayTimeResults() { - super(); - } - - public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { - return hiveIntervalDayTimeWritable; - } - } - - // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. - // A separate results object is created by the caller at initialization per different - // INTERVAL_DAY_TIME field. - @Override - public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { - return new BinarySortableReadIntervalDayTimeResults(); - } - - @Override - public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResults) - throws IOException { - BinarySortableReadIntervalDayTimeResults binarySortableReadIntervalDayTimeResults = - (BinarySortableReadIntervalDayTimeResults) readIntervalDayTimeResults; - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - long totalSecs = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 7; i++) { - totalSecs = (totalSecs << 8) + (inputByteBuffer.read(invert) & 0xff); - } - int nanos = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 3; i++) { - nanos = (nanos << 8) + (inputByteBuffer.read(invert) & 0xff); - } - HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = - binarySortableReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); - hiveIntervalDayTimeWritable.set(totalSecs, nanos); - } - - /* - * DECIMAL. - */ - - // This class is for internal use. - private static class BinarySortableReadDecimalResults extends ReadDecimalResults { - - public HiveDecimal hiveDecimal; - - public BinarySortableReadDecimalResults() { - super(); - } - - @Override - public void init(DecimalTypeInfo decimalTypeInfo) { - super.init(decimalTypeInfo); - } - - @Override - public HiveDecimal getHiveDecimal() { - return hiveDecimal; - } - } - - // Reading a DECIMAL field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DECIMAL field. - @Override - public ReadDecimalResults createReadDecimalResults() { - return new BinarySortableReadDecimalResults(); - } - - @Override - public void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException { - BinarySortableReadDecimalResults binarySortableReadDecimalResults = - (BinarySortableReadDecimalResults) readDecimalResults; - - if (!binarySortableReadDecimalResults.isInit()) { - binarySortableReadDecimalResults.init(saveDecimalTypeInfo); - } - - binarySortableReadDecimalResults.hiveDecimal = saveDecimal; - - saveDecimal = null; - saveDecimalTypeInfo = null; - } - - /** - * We read the whole HiveDecimal value and then enforce precision and scale, which may - * make it a NULL. - * @return Returns true if this HiveDecimal enforced to a NULL. - * @throws IOException - */ - private boolean earlyReadHiveDecimal() throws IOException { - - // Since enforcing precision and scale can cause a HiveDecimal to become NULL, - // we must read it, enforce it here, and either return NULL or buffer the result. - - final boolean invert = columnSortOrderIsDesc[fieldIndex]; - int b = inputByteBuffer.read(invert) - 1; - assert (b == 1 || b == -1 || b == 0); - boolean positive = b != -1; - - int factor = inputByteBuffer.read(invert) ^ 0x80; - for (int i = 0; i < 3; i++) { - factor = (factor << 8) + (inputByteBuffer.read(invert) & 0xff); - } - - if (!positive) { - factor = -factor; - } - - int start = inputByteBuffer.tell(); - int length = 0; - - do { - b = inputByteBuffer.read(positive ? invert : !invert); - assert(b != 1); - - if (b == 0) { - // end of digits - break; - } - - length++; - } while (true); - - if(tempDecimalBuffer == null || tempDecimalBuffer.length < length) { - tempDecimalBuffer = new byte[length]; - } - - inputByteBuffer.seek(start); - for (int i = 0; i < length; ++i) { - tempDecimalBuffer[i] = inputByteBuffer.read(positive ? invert : !invert); - } - - // read the null byte again - inputByteBuffer.read(positive ? invert : !invert); - - String digits = new String(tempDecimalBuffer, 0, length, BinarySortableSerDe.decimalCharSet); - BigInteger bi = new BigInteger(digits); - HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor-length); - - if (!positive) { - bd = bd.negate(); - } - - // We have a decimal. After we enforce precision and scale, will it become a NULL? - - if (tempHiveDecimalWritable == null) { - tempHiveDecimalWritable = new HiveDecimalWritable(); - } - tempHiveDecimalWritable.set(bd); - - saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; - - int precision = saveDecimalTypeInfo.getPrecision(); - int scale = saveDecimalTypeInfo.getScale(); - - saveDecimal = tempHiveDecimalWritable.getHiveDecimal(precision, scale); - // Now return whether it is NULL or NOT NULL. - return (saveDecimal == null); + private void doReadBeyondConfiguredFieldsWarned() { + // Warn only once. + LOG.info("Reading beyond configured fields! Configured " + fieldCount + " fields but " + + " reading more (NULLs returned). Ignoring similar problems."); + readBeyondConfiguredFieldsWarned = true; + } + + private void doReadBeyondBufferRangeWarned() { + // Warn only once. + int length = inputByteBuffer.tell() - start; + LOG.info("Reading beyond buffer range! Buffer range " + start + + " for length " + length + " but reading more... " + + "(total buffer length " + inputByteBuffer.getData().length + ")" + + " Ignoring similar problems."); + readBeyondBufferRangeWarned = true; } } http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java index c2b0cfc..2fad2af 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java @@ -19,31 +19,22 @@ package org.apache.hadoop.hive.serde2.fast; import java.io.IOException; -import java.sql.Date; -import java.sql.Timestamp; - -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; /* * Directly deserialize with the caller reading field-by-field a serialization format. - * + * * The caller is responsible for calling the read method for the right type of each field * (after calling readCheckNull). - * + * * Reading some fields require a results object to receive value information. A separate * results object is created by the caller at initialization per different field even for the same * type. @@ -52,17 +43,88 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; * other type specific buffers. So, those references are only valid until the next time set is * called. */ -public interface DeserializeRead { +public abstract class DeserializeRead { + + protected TypeInfo[] typeInfos; + + protected boolean[] columnsToInclude; + + protected Category[] categories; + protected PrimitiveCategory[] primitiveCategories; + + public DeserializeRead(TypeInfo[] typeInfos) { + this.typeInfos = typeInfos; + final int count = typeInfos.length; + categories = new Category[count]; + primitiveCategories = new PrimitiveCategory[count]; + for (int i = 0; i < count; i++) { + TypeInfo typeInfo = typeInfos[i]; + Category category = typeInfo.getCategory(); + categories[i] = category; + if (category == Category.PRIMITIVE) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[i] = primitiveCategory; + + switch (primitiveCategory) { + case DATE: + if (currentDateWritable == null) { + currentDateWritable = new DateWritable(); + } + break; + case TIMESTAMP: + if (currentTimestampWritable == null) { + currentTimestampWritable = new TimestampWritable(); + } + break; + case INTERVAL_YEAR_MONTH: + if (currentHiveIntervalYearMonthWritable == null) { + currentHiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); + } + break; + case INTERVAL_DAY_TIME: + if (currentHiveIntervalDayTimeWritable == null) { + currentHiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); + } + break; + case DECIMAL: + if (currentHiveDecimalWritable == null) { + currentHiveDecimalWritable = new HiveDecimalWritable(); + } + break; + default: + // No writable needed for this data type. + } + } + } + + columnsToInclude = null; + } + + // Don't allow for public. + protected DeserializeRead() { + } /* * The type information for all fields. */ - TypeInfo[] typeInfos(); + public TypeInfo[] typeInfos() { + return typeInfos; + } + + /* + * If some fields are are not going to be used by the query, use this routine to specify + * the columns to return. The readCheckNull method will automatically return NULL for the + * other columns. + */ + public void setColumnsToInclude(boolean[] columnsToInclude) { + this.columnsToInclude = columnsToInclude; + } /* * Set the range of bytes to be deserialized. */ - void set(byte[] bytes, int offset, int length); + public abstract void set(byte[] bytes, int offset, int length); /* * Reads the NULL information for a field. @@ -70,318 +132,91 @@ public interface DeserializeRead { * @return Return true when the field is NULL; reading is positioned to the next field. * Otherwise, false when the field is NOT NULL; reading is positioned to the field data. */ - boolean readCheckNull() throws IOException; + public abstract boolean readCheckNull() throws IOException; /* * Call this method after all fields have been read to check for extra fields. */ - void extraFieldsCheck(); - + public abstract void extraFieldsCheck(); + /* * Read integrity warning flags. */ - boolean readBeyondConfiguredFieldsWarned(); - boolean readBeyondBufferRangeWarned(); - boolean bufferRangeHasExtraDataWarned(); + public abstract boolean readBeyondConfiguredFieldsWarned(); + public abstract boolean readBeyondBufferRangeWarned(); + public abstract boolean bufferRangeHasExtraDataWarned(); + + /* + * These members hold the current value that was read when readCheckNull return false. + */ /* * BOOLEAN. */ - boolean readBoolean() throws IOException; + public boolean currentBoolean; /* * BYTE. */ - byte readByte() throws IOException; + public byte currentByte; /* * SHORT. */ - short readShort() throws IOException; + public short currentShort; /* * INT. */ - int readInt() throws IOException; + public int currentInt; /* * LONG. */ - long readLong() throws IOException; + public long currentLong; /* * FLOAT. */ - float readFloat() throws IOException; + public float currentFloat; /* * DOUBLE. */ - double readDouble() throws IOException; - - /* - * This class is the base abstract read bytes results for STRING, CHAR, VARCHAR, and BINARY. - */ - public abstract class ReadBytesResults { - - public byte[] bytes; - public int start; - public int length; - - public ReadBytesResults() { - bytes = null; - start = 0; - length = 0; - } - } + public double currentDouble; /* - * STRING. + * STRING, CHAR, VARCHAR, and BINARY. * - * Can be used to read CHAR and VARCHAR when the caller takes responsibility for + * For CHAR and VARCHAR when the caller takes responsibility for * truncation/padding issues. */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadStringResults extends ReadBytesResults { - - public ReadStringResults() { - super(); - } - } - - // Reading a STRING field require a results object to receive value information. A separate - // results object is created at initialization per different bytes field. - ReadStringResults createReadStringResults(); - - void readString(ReadStringResults readStringResults) throws IOException; - - /* - * CHAR. - */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadHiveCharResults extends ReadBytesResults { - - private CharTypeInfo charTypeInfo; - private int maxLength; - - protected HiveCharWritable hiveCharWritable; - - public ReadHiveCharResults() { - super(); - } - - public void init(CharTypeInfo charTypeInfo) { - this.charTypeInfo = charTypeInfo; - this.maxLength = charTypeInfo.getLength(); - hiveCharWritable = new HiveCharWritable(); - } - - public boolean isInit() { - return (charTypeInfo != null); - } - - public int getMaxLength() { - return maxLength; - } - - public HiveChar getHiveChar() { - return hiveCharWritable.getHiveChar(); - } - } - - // Reading a CHAR field require a results object to receive value information. A separate - // results object is created at initialization per different CHAR field. - ReadHiveCharResults createReadHiveCharResults(); - - void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException; - - /* - * VARCHAR. - */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadHiveVarcharResults extends ReadBytesResults { - - private VarcharTypeInfo varcharTypeInfo; - private int maxLength; - - protected HiveVarcharWritable hiveVarcharWritable; - - public ReadHiveVarcharResults() { - super(); - } - - public void init(VarcharTypeInfo varcharTypeInfo) { - this.varcharTypeInfo = varcharTypeInfo; - this.maxLength = varcharTypeInfo.getLength(); - hiveVarcharWritable = new HiveVarcharWritable(); - } - - public boolean isInit() { - return (varcharTypeInfo != null); - } - - public int getMaxLength() { - return maxLength; - } - - public HiveVarchar getHiveVarchar() { - return hiveVarcharWritable.getHiveVarchar(); - } - } - - // Reading a VARCHAR field require a results object to receive value information. A separate - // results object is created at initialization per different VARCHAR field. - ReadHiveVarcharResults createReadHiveVarcharResults(); - - void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException; - - /* - * BINARY. - */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadBinaryResults extends ReadBytesResults { - - public ReadBinaryResults() { - super(); - } - } - - // Reading a BINARY field require a results object to receive value information. A separate - // results object is created at initialization per different bytes field. - ReadBinaryResults createReadBinaryResults(); - - void readBinary(ReadBinaryResults readBinaryResults) throws IOException; + public byte[] currentBytes; + public int currentBytesStart; + public int currentBytesLength; /* * DATE. */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadDateResults { - - protected DateWritable dateWritable; - - public ReadDateResults() { - dateWritable = new DateWritable(); - } - - public Date getDate() { - return dateWritable.get(); - } - - public int getDays() { - return dateWritable.getDays(); - } - } - - // Reading a DATE field require a results object to receive value information. A separate - // results object is created at initialization per different DATE field. - ReadDateResults createReadDateResults(); - - void readDate(ReadDateResults readDateResults) throws IOException; + public DateWritable currentDateWritable; /* * TIMESTAMP. */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadTimestampResults { - - protected TimestampWritable timestampWritable; - - public ReadTimestampResults() { - timestampWritable = new TimestampWritable(); - } - - public Timestamp getTimestamp() { - return timestampWritable.getTimestamp(); - } - } - - // Reading a TIMESTAMP field require a results object to receive value information. A separate - // results object is created at initialization per different TIMESTAMP field. - ReadTimestampResults createReadTimestampResults(); - - void readTimestamp(ReadTimestampResults readTimestampResult) throws IOException; + public TimestampWritable currentTimestampWritable; /* * INTERVAL_YEAR_MONTH. */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadIntervalYearMonthResults { - - protected HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable; - - public ReadIntervalYearMonthResults() { - hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable(); - } - - public HiveIntervalYearMonth getHiveIntervalYearMonth() { - return hiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); - } - } - - // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. - // A separate results object is created at initialization per different INTERVAL_YEAR_MONTH field. - ReadIntervalYearMonthResults createReadIntervalYearMonthResults(); - - void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearMonthResult) throws IOException; + public HiveIntervalYearMonthWritable currentHiveIntervalYearMonthWritable; /* * INTERVAL_DAY_TIME. */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadIntervalDayTimeResults { - - protected HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable; - - public ReadIntervalDayTimeResults() { - hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); - } - - public HiveIntervalDayTime getHiveIntervalDayTime() { - return hiveIntervalDayTimeWritable.getHiveIntervalDayTime(); - } - } - - // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. - // A separate results object is created at initialization per different INTERVAL_DAY_TIME field. - ReadIntervalDayTimeResults createReadIntervalDayTimeResults(); - - void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeResult) throws IOException; + public HiveIntervalDayTimeWritable currentHiveIntervalDayTimeWritable; /* * DECIMAL. */ - - // This class is for abstract since each format may need its own specialization. - public abstract class ReadDecimalResults { - - protected DecimalTypeInfo decimalTypeInfo; - - public ReadDecimalResults() { - } - - public void init(DecimalTypeInfo decimalTypeInfo) { - this.decimalTypeInfo = decimalTypeInfo; - } - - public boolean isInit() { - return (decimalTypeInfo != null); - } - - public abstract HiveDecimal getHiveDecimal(); - } - - // Reading a DECIMAL field require a results object to receive value information. A separate - // results object is created at initialization per different DECIMAL field. - ReadDecimalResults createReadDecimalResults(); - - void readHiveDecimal(ReadDecimalResults readDecimalResults) throws IOException; + public HiveDecimalWritable currentHiveDecimalWritable; } \ No newline at end of file
