Repository: hive Updated Branches: refs/heads/master 727581d74 -> ad1243bef
HIVE-18413 : Grouping of an empty result set may only contain null values (Zoltan Haindrich via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad1243be Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad1243be Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad1243be Branch: refs/heads/master Commit: ad1243bef60f1b1f5f15a511761959df9abae615 Parents: 727581d Author: Zoltan Haindrich <k...@rxd.hu> Authored: Tue Jan 9 03:22:00 2018 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Thu Jan 18 10:21:40 2018 -0800 ---------------------------------------------------------------------- .../ql/exec/vector/VectorGroupByOperator.java | 4 +- .../ql/exec/vector/VectorHashKeyWrapper.java | 11 +++++ .../exec/vector/VectorHashKeyWrapperBatch.java | 4 +- .../clientpositive/groupby_rollup_empty.q | 16 ++++++- .../clientpositive/groupby_rollup_empty.q.out | 45 +++++++++++++++++++- .../llap/groupby_rollup_empty.q.out | 45 +++++++++++++++++++- 6 files changed, 114 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 90145e5..6dba095 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -43,8 +43,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -54,7 +52,6 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.DataOutputBuffer; @@ -854,6 +851,7 @@ public class VectorGroupByOperator extends Operator<GroupByDesc> } if (!hasOutput && GroupByOperator.shouldEmitSummaryRow(conf)) { VectorHashKeyWrapper kw = keyWrappersBatch.getVectorHashKeyWrappers()[0]; + kw.setNull(); int pos = conf.getGroupingSetPosition(); if (pos >= 0) { long val = (1 << pos) - 1; http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java index 3e1fcdd..f9ae930 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java @@ -300,6 +300,13 @@ public class VectorHashKeyWrapper extends KeyWrapper { throw new UnsupportedOperationException(); } + public void assignLong(int keyIndex, int index, long v) { + isNull[keyIndex] = false; + longValues[index] = v; + } + + // FIXME: isNull is not updated; which might cause problems + @Deprecated public void assignLong(int index, long v) { longValues[index] = v; } @@ -480,6 +487,10 @@ public class VectorHashKeyWrapper extends KeyWrapper { Arrays.fill(isNull, false); } + public void setNull() { + Arrays.fill(isNull, true); + } + public boolean isNull(int keyIndex) { return isNull[keyIndex]; } http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index c0b74ab..f6b9037 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -22,8 +22,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; /** @@ -1032,7 +1030,7 @@ public class VectorHashKeyWrapperBatch extends VectorColumnSetInfo { kw.assignNullLong(keyIndex, columnTypeSpecificIndex); return; } - kw.assignLong(columnTypeSpecificIndex, value); + kw.assignLong(keyIndex, columnTypeSpecificIndex, value); } public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int keyIndex, http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/test/queries/clientpositive/groupby_rollup_empty.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_rollup_empty.q b/ql/src/test/queries/clientpositive/groupby_rollup_empty.q index 0bd5179..432d8c4 100644 --- a/ql/src/test/queries/clientpositive/groupby_rollup_empty.q +++ b/ql/src/test/queries/clientpositive/groupby_rollup_empty.q @@ -41,7 +41,7 @@ group by rollup (b); set hive.vectorized.execution.enabled=true; -create table tx2 (a integer,b integer,c integer,d double,u string) stored as orc; +create table tx2 (a integer,b integer,c integer,d double,u string,bi binary) stored as orc; explain select sum(c), @@ -64,3 +64,17 @@ from tx2 where a<0 group by a,b,d grouping sets ((), b, a, d); + +insert into tx2 values +(1,2,3,1.1,'x','b'), +(3,2,3,1.1,'y','b'); + +select sum(a), + u, + bi, + 'asd', + grouping(bi), + 'NULL,1' as expected +from tx2 +where a=2 +group by a,u,bi grouping sets ( u, (), bi); http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out b/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out index 59dc0dc..51fecb3 100644 --- a/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out +++ b/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out @@ -111,11 +111,11 @@ POSTHOOK: Input: default@tx1 #### A masked pattern was here #### 1 1 1,1 and 1,0 1 0 1,1 and 1,0 -PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string) stored as orc +PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string,bi binary) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tx2 -POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string) stored as orc +POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string,bi binary) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tx2 @@ -225,3 +225,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tx2 #### A masked pattern was here #### NULL NULL asd 1 NULL,1 +PREHOOK: query: insert into tx2 values +(1,2,3,1.1,'x','b'), +(3,2,3,1.1,'y','b') +PREHOOK: type: QUERY +PREHOOK: Output: default@tx2 +POSTHOOK: query: insert into tx2 values +(1,2,3,1.1,'x','b'), +(3,2,3,1.1,'y','b') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tx2 +POSTHOOK: Lineage: tx2.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tx2.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tx2.bi EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: tx2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: tx2.d EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: tx2.u SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +PREHOOK: query: select sum(a), + u, + bi, + 'asd', + grouping(bi), + 'NULL,1' as expected +from tx2 +where a=2 +group by a,u,bi grouping sets ( u, (), bi) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(a), + u, + bi, + 'asd', + grouping(bi), + 'NULL,1' as expected +from tx2 +where a=2 +group by a,u,bi grouping sets ( u, (), bi) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx2 +#### A masked pattern was here #### +NULL NULL NULL asd 1 NULL,1 http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out b/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out index 3a2d438..19a70f6 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out @@ -111,11 +111,11 @@ POSTHOOK: Input: default@tx1 #### A masked pattern was here #### 1 1 1,1 and 1,0 1 0 1,1 and 1,0 -PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string) stored as orc +PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string,bi binary) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tx2 -POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string) stored as orc +POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u string,bi binary) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tx2 @@ -234,3 +234,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tx2 #### A masked pattern was here #### NULL NULL asd 1 NULL,1 +PREHOOK: query: insert into tx2 values +(1,2,3,1.1,'x','b'), +(3,2,3,1.1,'y','b') +PREHOOK: type: QUERY +PREHOOK: Output: default@tx2 +POSTHOOK: query: insert into tx2 values +(1,2,3,1.1,'x','b'), +(3,2,3,1.1,'y','b') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tx2 +POSTHOOK: Lineage: tx2.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: tx2.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tx2.bi EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: tx2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: tx2.d EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: tx2.u SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +PREHOOK: query: select sum(a), + u, + bi, + 'asd', + grouping(bi), + 'NULL,1' as expected +from tx2 +where a=2 +group by a,u,bi grouping sets ( u, (), bi) +PREHOOK: type: QUERY +PREHOOK: Input: default@tx2 +#### A masked pattern was here #### +POSTHOOK: query: select sum(a), + u, + bi, + 'asd', + grouping(bi), + 'NULL,1' as expected +from tx2 +where a=2 +group by a,u,bi grouping sets ( u, (), bi) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tx2 +#### A masked pattern was here #### +NULL NULL NULL asd 1 NULL,1