Repository: hive
Updated Branches:
  refs/heads/master 727581d74 -> ad1243bef


HIVE-18413 : Grouping of an empty result set may only contain null values 
(Zoltan Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad1243be
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad1243be
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad1243be

Branch: refs/heads/master
Commit: ad1243bef60f1b1f5f15a511761959df9abae615
Parents: 727581d
Author: Zoltan Haindrich <k...@rxd.hu>
Authored: Tue Jan 9 03:22:00 2018 -0800
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Thu Jan 18 10:21:40 2018 -0800

----------------------------------------------------------------------
 .../ql/exec/vector/VectorGroupByOperator.java   |  4 +-
 .../ql/exec/vector/VectorHashKeyWrapper.java    | 11 +++++
 .../exec/vector/VectorHashKeyWrapperBatch.java  |  4 +-
 .../clientpositive/groupby_rollup_empty.q       | 16 ++++++-
 .../clientpositive/groupby_rollup_empty.q.out   | 45 +++++++++++++++++++-
 .../llap/groupby_rollup_empty.q.out             | 45 +++++++++++++++++++-
 6 files changed, 114 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 90145e5..6dba095 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -43,8 +43,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -54,7 +52,6 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.DataOutputBuffer;
@@ -854,6 +851,7 @@ public class VectorGroupByOperator extends 
Operator<GroupByDesc>
       }
       if (!hasOutput && GroupByOperator.shouldEmitSummaryRow(conf)) {
         VectorHashKeyWrapper kw = 
keyWrappersBatch.getVectorHashKeyWrappers()[0];
+        kw.setNull();
         int pos = conf.getGroupingSetPosition();
         if (pos >= 0) {
           long val = (1 << pos) - 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
index 3e1fcdd..f9ae930 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
@@ -300,6 +300,13 @@ public class VectorHashKeyWrapper extends KeyWrapper {
     throw new UnsupportedOperationException();
   }
 
+  public void assignLong(int keyIndex, int index, long v) {
+    isNull[keyIndex] = false;
+    longValues[index] = v;
+  }
+
+  // FIXME: isNull is not updated; which might cause problems
+  @Deprecated
   public void assignLong(int index, long v) {
     longValues[index] = v;
   }
@@ -480,6 +487,10 @@ public class VectorHashKeyWrapper extends KeyWrapper {
     Arrays.fill(isNull, false);
   }
 
+  public void setNull() {
+    Arrays.fill(isNull, true);
+  }
+
   public boolean isNull(int keyIndex) {
     return isNull[keyIndex];
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
index c0b74ab..f6b9037 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
@@ -22,8 +22,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
 
 /**
@@ -1032,7 +1030,7 @@ public class VectorHashKeyWrapperBatch extends 
VectorColumnSetInfo {
       kw.assignNullLong(keyIndex, columnTypeSpecificIndex);
       return;
     }
-    kw.assignLong(columnTypeSpecificIndex, value);
+    kw.assignLong(keyIndex, columnTypeSpecificIndex, value);
   }
 
   public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int 
keyIndex,

http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/test/queries/clientpositive/groupby_rollup_empty.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_rollup_empty.q 
b/ql/src/test/queries/clientpositive/groupby_rollup_empty.q
index 0bd5179..432d8c4 100644
--- a/ql/src/test/queries/clientpositive/groupby_rollup_empty.q
+++ b/ql/src/test/queries/clientpositive/groupby_rollup_empty.q
@@ -41,7 +41,7 @@ group by rollup (b);
 
 
 set hive.vectorized.execution.enabled=true;
-create table tx2 (a integer,b integer,c integer,d double,u string) stored as 
orc;
+create table tx2 (a integer,b integer,c integer,d double,u string,bi binary) 
stored as orc;
 
 explain
 select  sum(c),
@@ -64,3 +64,17 @@ from    tx2
 where  a<0
 group by a,b,d grouping sets ((), b, a, d);
 
+
+insert into tx2 values
+(1,2,3,1.1,'x','b'),
+(3,2,3,1.1,'y','b');
+
+select  sum(a),
+       u,
+       bi,
+       'asd',
+        grouping(bi),
+       'NULL,1' as expected
+from    tx2
+where  a=2
+group by a,u,bi grouping sets ( u, (), bi);

http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out 
b/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out
index 59dc0dc..51fecb3 100644
--- a/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out
+++ b/ql/src/test/results/clientpositive/groupby_rollup_empty.q.out
@@ -111,11 +111,11 @@ POSTHOOK: Input: default@tx1
 #### A masked pattern was here ####
 1      1       1,1 and 1,0
 1      0       1,1 and 1,0
-PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string) stored as orc
+PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string,bi binary) stored as orc
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@tx2
-POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string) stored as orc
+POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string,bi binary) stored as orc
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@tx2
@@ -225,3 +225,44 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tx2
 #### A masked pattern was here ####
 NULL   NULL    asd     1       NULL,1
+PREHOOK: query: insert into tx2 values
+(1,2,3,1.1,'x','b'),
+(3,2,3,1.1,'y','b')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tx2
+POSTHOOK: query: insert into tx2 values
+(1,2,3,1.1,'x','b'),
+(3,2,3,1.1,'y','b')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tx2
+POSTHOOK: Lineage: tx2.a EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.b EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.bi EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.d EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.u SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
+PREHOOK: query: select  sum(a),
+       u,
+       bi,
+       'asd',
+        grouping(bi),
+       'NULL,1' as expected
+from    tx2
+where  a=2
+group by a,u,bi grouping sets ( u, (), bi)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select  sum(a),
+       u,
+       bi,
+       'asd',
+        grouping(bi),
+       'NULL,1' as expected
+from    tx2
+where  a=2
+group by a,u,bi grouping sets ( u, (), bi)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+NULL   NULL    NULL    asd     1       NULL,1

http://git-wip-us.apache.org/repos/asf/hive/blob/ad1243be/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out 
b/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out
index 3a2d438..19a70f6 100644
--- a/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby_rollup_empty.q.out
@@ -111,11 +111,11 @@ POSTHOOK: Input: default@tx1
 #### A masked pattern was here ####
 1      1       1,1 and 1,0
 1      0       1,1 and 1,0
-PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string) stored as orc
+PREHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string,bi binary) stored as orc
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@tx2
-POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string) stored as orc
+POSTHOOK: query: create table tx2 (a integer,b integer,c integer,d double,u 
string,bi binary) stored as orc
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@tx2
@@ -234,3 +234,44 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tx2
 #### A masked pattern was here ####
 NULL   NULL    asd     1       NULL,1
+PREHOOK: query: insert into tx2 values
+(1,2,3,1.1,'x','b'),
+(3,2,3,1.1,'y','b')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tx2
+POSTHOOK: query: insert into tx2 values
+(1,2,3,1.1,'x','b'),
+(3,2,3,1.1,'y','b')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tx2
+POSTHOOK: Lineage: tx2.a EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.b EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.bi EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.d EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
+POSTHOOK: Lineage: tx2.u SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
+PREHOOK: query: select  sum(a),
+       u,
+       bi,
+       'asd',
+        grouping(bi),
+       'NULL,1' as expected
+from    tx2
+where  a=2
+group by a,u,bi grouping sets ( u, (), bi)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select  sum(a),
+       u,
+       bi,
+       'asd',
+        grouping(bi),
+       'NULL,1' as expected
+from    tx2
+where  a=2
+group by a,u,bi grouping sets ( u, (), bi)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+NULL   NULL    NULL    asd     1       NULL,1

Reply via email to