HIVE-18258: Vectorization: Reduce-Side GROUP BY MERGEPARTIAL with duplicate columns is broken (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f52e8b4b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f52e8b4b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f52e8b4b Branch: refs/heads/standalone-metastore Commit: f52e8b4ba38f2a1141650d99efb12c923cee7cd0 Parents: 856d88d Author: Matt McCline <[email protected]> Authored: Fri Dec 15 11:14:20 2017 -0600 Committer: Matt McCline <[email protected]> Committed: Fri Dec 15 11:14:20 2017 -0600 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../ql/exec/vector/VectorGroupKeyHelper.java | 54 +++-- .../vector_reduce_groupby_duplicate_cols.q | 29 +++ .../vector_reduce_groupby_duplicate_cols.q.out | 211 +++++++++++++++++++ .../vector_reduce_groupby_duplicate_cols.q.out | 180 ++++++++++++++++ 5 files changed, 454 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 37079b7..2bf64dc 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -381,6 +381,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_reduce2.q,\ vector_reduce3.q,\ vector_reduce_groupby_decimal.q,\ + vector_reduce_grpupby_duplicate_cols.q,\ vector_row__id.q,\ vector_string_concat.q,\ vector_struct_in.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index 13a929b..02b0e5c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -32,7 +32,7 @@ import org.apache.hadoop.io.DataOutputBuffer; */ public class VectorGroupKeyHelper extends VectorColumnSetInfo { - private int[] outputColumnNums; + private int[] inputColumnNums; public VectorGroupKeyHelper(int keyCount) { super(keyCount); @@ -44,13 +44,18 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { // case, we use the keyCount passed to the constructor and not keyExpressions.length. // Inspect the output type of each key expression. And, remember the output columns. - outputColumnNums = new int[keyCount]; + inputColumnNums = new int[keyCount]; for(int i = 0; i < keyCount; ++i) { VectorExpression keyExpression = keyExpressions[i]; + TypeInfo typeInfo = keyExpression.getOutputTypeInfo(); Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); addKey(columnVectorType); - outputColumnNums[i] = keyExpression.getOutputColumnNum(); + + // The output of the key expression is the input column. + final int inputColumnNum = keyExpression.getOutputColumnNum(); + + inputColumnNums[i] = inputColumnNum; } finishAdding(); } @@ -64,10 +69,12 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { */ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outputBatch, DataOutputBuffer buffer) throws HiveException { + for(int i = 0; i< longIndices.length; ++i) { - final int columnIndex = outputColumnNums[longIndices[i]]; - LongColumnVector inputColumnVector = (LongColumnVector) inputBatch.cols[columnIndex]; - LongColumnVector outputColumnVector = (LongColumnVector) outputBatch.cols[columnIndex]; + final int outputColumnNum = longIndices[i]; + final int inputColumnNum = inputColumnNums[outputColumnNum]; + LongColumnVector inputColumnVector = (LongColumnVector) inputBatch.cols[inputColumnNum]; + LongColumnVector outputColumnVector = (LongColumnVector) outputBatch.cols[outputColumnNum]; // This vectorized code pattern says: // If the input batch has no nulls at all (noNulls is true) OR @@ -91,9 +98,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { } } for(int i=0;i<doubleIndices.length; ++i) { - final int columnIndex = outputColumnNums[doubleIndices[i]]; - DoubleColumnVector inputColumnVector = (DoubleColumnVector) inputBatch.cols[columnIndex]; - DoubleColumnVector outputColumnVector = (DoubleColumnVector) outputBatch.cols[columnIndex]; + final int outputColumnNum = doubleIndices[i]; + final int inputColumnNum = inputColumnNums[outputColumnNum]; + DoubleColumnVector inputColumnVector = (DoubleColumnVector) inputBatch.cols[inputColumnNum]; + DoubleColumnVector outputColumnVector = (DoubleColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { outputColumnVector.vector[outputBatch.size] = inputColumnVector.vector[0]; } else { @@ -102,9 +110,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { } } for(int i=0;i<stringIndices.length; ++i) { - final int columnIndex = outputColumnNums[stringIndices[i]]; - BytesColumnVector inputColumnVector = (BytesColumnVector) inputBatch.cols[columnIndex]; - BytesColumnVector outputColumnVector = (BytesColumnVector) outputBatch.cols[columnIndex]; + final int outputColumnNum = stringIndices[i]; + final int inputColumnNum = inputColumnNums[outputColumnNum]; + BytesColumnVector inputColumnVector = (BytesColumnVector) inputBatch.cols[inputColumnNum]; + BytesColumnVector outputColumnVector = (BytesColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { // Copy bytes into scratch buffer. int start = buffer.getLength(); @@ -121,9 +130,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { } } for(int i=0;i<decimalIndices.length; ++i) { - final int columnIndex = outputColumnNums[decimalIndices[i]]; - DecimalColumnVector inputColumnVector = (DecimalColumnVector) inputBatch.cols[columnIndex]; - DecimalColumnVector outputColumnVector = (DecimalColumnVector) outputBatch.cols[columnIndex]; + final int outputColumnNum = decimalIndices[i]; + final int inputColumnNum = inputColumnNums[outputColumnNum]; + DecimalColumnVector inputColumnVector = (DecimalColumnVector) inputBatch.cols[inputColumnNum]; + DecimalColumnVector outputColumnVector = (DecimalColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { // Since we store references to HiveDecimalWritable instances, we must use the update method instead @@ -135,9 +145,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { } } for(int i=0;i<timestampIndices.length; ++i) { - final int columnIndex = outputColumnNums[timestampIndices[i]]; - TimestampColumnVector inputColumnVector = (TimestampColumnVector) inputBatch.cols[columnIndex]; - TimestampColumnVector outputColumnVector = (TimestampColumnVector) outputBatch.cols[columnIndex]; + final int outputColumnNum = timestampIndices[i]; + final int inputColumnNum = inputColumnNums[outputColumnNum]; + TimestampColumnVector inputColumnVector = (TimestampColumnVector) inputBatch.cols[inputColumnNum]; + TimestampColumnVector outputColumnVector = (TimestampColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector); @@ -147,9 +158,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo { } } for(int i=0;i<intervalDayTimeIndices.length; ++i) { - final int columnIndex = outputColumnNums[intervalDayTimeIndices[i]]; - IntervalDayTimeColumnVector inputColumnVector = (IntervalDayTimeColumnVector) inputBatch.cols[columnIndex]; - IntervalDayTimeColumnVector outputColumnVector = (IntervalDayTimeColumnVector) outputBatch.cols[columnIndex]; + final int outputColumnNum = intervalDayTimeIndices[i]; + final int inputColumnNum = inputColumnNums[outputColumnNum]; + IntervalDayTimeColumnVector inputColumnVector = (IntervalDayTimeColumnVector) inputBatch.cols[inputColumnNum]; + IntervalDayTimeColumnVector outputColumnVector = (IntervalDayTimeColumnVector) outputBatch.cols[outputColumnNum]; if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) { outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector); http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q new file mode 100644 index 0000000..c82c960 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q @@ -0,0 +1,29 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.reducesink.new.enabled=false; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; +set hive.strict.checks.cartesian.product=false; +set hive.cbo.enable=false; + +-- HIVE-18258 + +create table demo (one int, two int); +insert into table demo values (1, 2); + +explain vectorization detail +select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one; + +select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out new file mode 100644 index 0000000..afca3df --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -0,0 +1,211 @@ +PREHOOK: query: create table demo (one int, two int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@demo +POSTHOOK: query: create table demo (one int, two int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@demo +PREHOOK: query: insert into table demo values (1, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@demo +POSTHOOK: query: insert into table demo values (1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@demo +POSTHOOK: Lineage: demo.one EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: demo.two EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: explain vectorization detail +select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 1 (type: int), 2 (type: int) + sort order: ++ + Map-reduce partition columns: 1 (type: int), 2 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:one:int, 1:two:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:int)) + predicate: (one is not null and two is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 one (type: int), two (type: int) + 1 1 (type: int), 2 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [0, 1] + bigTableRetainedColumnNums: [0, 1] + bigTableValueColumnNums: [0, 1] + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1] + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: one:int, two:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:int, KEY._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int, col 1:int, col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 1, 2] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@demo +#### A masked pattern was here #### +POSTHOOK: query: select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@demo +#### A masked pattern was here #### +one_0 two one_1 +1 2 1 http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out new file mode 100644 index 0000000..eaa4031 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: create table demo (one int, two int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@demo +POSTHOOK: query: create table demo (one int, two int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@demo +PREHOOK: query: insert into table demo values (1, 2) +PREHOOK: type: QUERY +PREHOOK: Output: default@demo +POSTHOOK: query: insert into table demo values (1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@demo +POSTHOOK: Lineage: demo.one EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: demo.two EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: explain vectorization detail +select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + b:_dummy_table + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b:_dummy_table + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + HashTable Sink Operator + keys: + 0 one (type: int), two (type: int) + 1 1 (type: int), 2 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:one:int, 1:two:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:int)) + predicate: (one is not null and two is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 one (type: int), two (type: int) + 1 1 (type: int), 2 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: one:int, two:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Local Work: + Map Reduce Local Work + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@demo +#### A masked pattern was here #### +POSTHOOK: query: select one as one_0, two, one as one_1 +from demo a +join (select 1 as one, 2 as two) b +on a.one = b.one and a.two = b.two +group by a.one, a.two, a.one +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@demo +#### A masked pattern was here #### +one_0 two one_1 +1 2 1
