[02/14] hive git commit: HIVE-18079 : Statistics: Allow HyperLogLog to be merged to the lowest-common-denominator bit-size (Gopal V via Prasanth J)

hashutosh Thu, 07 Jun 2018 23:00:27 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
index 68aabb0..4a10953 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out
@@ -193,48 +193,48 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: c
+                  alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
+                    predicate: value is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: b
+                  alias: c
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: value is not null (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: string)
+                        key expressions: _col0 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
@@ -242,28 +242,28 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1
+                  0 _col1 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int)
+                  value expressions: _col2 (type: int)
         Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
-                  1 _col1 (type: string)
-                outputColumnNames: _col0, _col3
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col3 (type: int)
+                  expressions: _col0 (type: int), _col2 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
@@ -309,48 +309,48 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: c
+                  alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
+                    predicate: value is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: b
+                  alias: c
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: value is not null (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: string)
+                        key expressions: _col0 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
@@ -358,28 +358,28 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1
+                  0 _col1 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int)
+                  value expressions: _col2 (type: int)
         Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
-                  1 _col1 (type: string)
-                outputColumnNames: _col0, _col3
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col3 (type: int)
+                  expressions: _col0 (type: int), _col2 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
@@ -1906,48 +1906,48 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: c
+                  alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
+                    predicate: value is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: b
+                  alias: c
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: value is not null (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: string)
+                        key expressions: _col0 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
@@ -1955,28 +1955,28 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1
+                  0 _col1 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int)
+                  value expressions: _col2 (type: int)
         Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
-                  1 _col1 (type: string)
-                outputColumnNames: _col0, _col3
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col3 (type: int)
+                  expressions: _col0 (type: int), _col2 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator
@@ -2023,48 +2023,48 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: c
+                  alias: b
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
+                    predicate: value is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: b
+                  alias: c
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: value is not null (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: int), value (type: string)
-                      outputColumnNames: _col0, _col1
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: string)
+                        key expressions: _col0 (type: int)
                         sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: int)
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
@@ -2072,28 +2072,28 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col0, _col1
+                  0 _col1 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col1 (type: string)
+                  key expressions: _col0 (type: int)
                   sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
+                  Map-reduce partition columns: _col0 (type: int)
                   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: int)
+                  value expressions: _col2 (type: int)
         Reducer 3 
             Reduce Operator Tree:
               Join Operator
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
-                  1 _col1 (type: string)
-                outputColumnNames: _col0, _col3
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col2
                 Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: int), _col3 (type: int)
+                  expressions: _col0 (type: int), _col2 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
                   File Output Operator


http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out 
b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
index b1363f0..ddd6bd1 100644
--- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
+++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out
@@ -488,26 +488,25 @@ JOIN src y ON (x.key = y.key)
 JOIN src1 z ON (x.key = z.key)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-3 depends on stages: Stage-4
+  Stage-3 is a root stage
   Stage-1 depends on stages: Stage-3
   Stage-0 depends on stages: Stage-1
   Stage-2 depends on stages: Stage-0
 
 STAGE PLANS:
-  Stage: Stage-4
+  Stage: Stage-3
     Spark
 #### A masked pattern was here ####
       Vertices:
-        Map 2 
+        Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: z
+                  alias: x
                   Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: key is not null (type: boolean)
+                    predicate: (key is not null and value is not null) (type: 
boolean)
                     Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
@@ -517,7 +516,8 @@ STAGE PLANS:
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
-                        Position of Big Table: 0
+                          2 _col0 (type: string)
+                        Position of Big Table: 2
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -573,42 +573,27 @@ STAGE PLANS:
                     name: default.src1
                   name: default.src1
             Truncated Path -> Alias:
-              /src1 [$hdt$_3:z]
-
-  Stage: Stage-3
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
+              /src1 [$hdt$_2:x]
+        Map 2 
             Map Operator Tree:
                 TableScan
-                  alias: x
+                  alias: z
                   Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (key is not null and value is not null) (type: 
boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 25 Data size: 191 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
+                      Spark HashTable Sink Operator
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
-                        outputColumnNames: _col0, _col1, _col3
-                        input vertices:
-                          1 Map 2
-                        Position of Big Table: 0
-                        Statistics: Num rows: 27 Data size: 210 Basic stats: 
COMPLETE Column stats: NONE
-                        Spark HashTable Sink Operator
-                          keys:
-                            0 _col1 (type: string)
-                            1 _col0 (type: string)
-                          Position of Big Table: 1
+                          2 _col0 (type: string)
+                        Position of Big Table: 2
             Execution mode: vectorized
             Local Work:
               Map Reduce Local Work
@@ -664,24 +649,24 @@ STAGE PLANS:
                     name: default.src1
                   name: default.src1
             Truncated Path -> Alias:
-              /src1 [$hdt$_2:x]
+              /src1 [$hdt$_3:z]
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: y
+                  alias: w
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: key is not null (type: boolean)
+                    predicate: value is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
+                      expressions: value (type: string)
+                      outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: string)
+                          0 _col1 (type: string)
                           1 _col0 (type: string)
                         Position of Big Table: 0
             Execution mode: vectorized
@@ -739,7 +724,7 @@ STAGE PLANS:
                     name: default.src
                   name: default.src
             Truncated Path -> Alias:
-              /src [$hdt$_0:y]
+              /src [$hdt$_0:w]
 
   Stage: Stage-1
     Spark
@@ -748,49 +733,52 @@ STAGE PLANS:
         Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: w
+                  alias: y
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: value is not null (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: value (type: string)
-                      outputColumnNames: _col0
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
+                             Inner Join 0 to 2
                         keys:
-                          0 _col1 (type: string)
+                          0 _col0 (type: string)
                           1 _col0 (type: string)
-                        outputColumnNames: _col0, _col3
+                          2 _col0 (type: string)
+                        outputColumnNames: _col0, _col1, _col3, _col5
                         input vertices:
                           0 Map 1
-                        Position of Big Table: 1
-                        Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
+                          1 Map 2
+                        Position of Big Table: 2
+                        Statistics: Num rows: 1100 Data size: 11686 Basic 
stats: COMPLETE Column stats: NONE
                         Map Join Operator
                           condition map:
                                Inner Join 0 to 1
                           keys:
-                            0 _col0 (type: string)
+                            0 _col1 (type: string)
                             1 _col0 (type: string)
-                          outputColumnNames: _col0, _col3, _col6
+                          outputColumnNames: _col0, _col3, _col5
                           input vertices:
                             1 Map 4
                           Position of Big Table: 0
-                          Statistics: Num rows: 605 Data size: 6427 Basic 
stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 1210 Data size: 12854 Basic 
stats: COMPLETE Column stats: NONE
                           Select Operator
-                            expressions: _col0 (type: string), _col3 (type: 
string), _col6 (type: string)
+                            expressions: _col0 (type: string), _col3 (type: 
string), _col5 (type: string)
                             outputColumnNames: _col0, _col1, _col2
-                            Statistics: Num rows: 605 Data size: 6427 Basic 
stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 1210 Data size: 12854 Basic 
stats: COMPLETE Column stats: NONE
                             File Output Operator
                               compressed: false
                               GlobalTableId: 1
 #### A masked pattern was here ####
                               NumFilesPerFileSink: 1
-                              Statistics: Num rows: 605 Data size: 6427 Basic 
stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 1210 Data size: 12854 
Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                               table:
                                   input format: 
org.apache.hadoop.mapred.TextInputFormat
@@ -873,7 +861,7 @@ STAGE PLANS:
                     name: default.src
                   name: default.src
             Truncated Path -> Alias:
-              /src [$hdt$_1:w]
+              /src [$hdt$_1:y]
 
   Stage: Stage-0
     Move Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out 
b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
index ec632a6..fff2f31 100644
--- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out
@@ -566,18 +566,18 @@ STAGE PLANS:
         Map 5 
             Map Operator Tree:
                 TableScan
-                  alias: src1
+                  alias: src
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
+                    predicate: value is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: key (type: string)
+                      expressions: value (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
-                          0 _col0 (type: string)
+                          0 _col1 (type: string)
                           1 _col0 (type: string)
             Execution mode: vectorized
             Local Work:
@@ -603,22 +603,22 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2
                       Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: string)
+                        key expressions: _col0 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 2000 Data size: 21248 Basic 
stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col2 (type: 
string)
+                        value expressions: _col1 (type: string), _col2 (type: 
string)
             Execution mode: vectorized
         Map 4 
             Map Operator Tree:
                 TableScan
-                  alias: src
+                  alias: src1
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: value is not null (type: boolean)
+                    predicate: key is not null (type: boolean)
                     Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: value (type: string)
+                      expressions: key (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
@@ -635,15 +635,15 @@ STAGE PLANS:
                 condition map:
                      Inner Join 0 to 1
                 keys:
-                  0 _col1 (type: string)
+                  0 _col0 (type: string)
                   1 _col0 (type: string)
-                outputColumnNames: _col0, _col2
+                outputColumnNames: _col1, _col2
                 Statistics: Num rows: 2200 Data size: 23372 Basic stats: 
COMPLETE Column stats: NONE
                 Map Join Operator
                   condition map:
                        Inner Join 0 to 1
                   keys:
-                    0 _col0 (type: string)
+                    0 _col1 (type: string)
                     1 _col0 (type: string)
                   outputColumnNames: _col2
                   input vertices:

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out 
b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
index 103491d..85d0b8a 100644
--- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out
@@ -1665,9 +1665,9 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_19]
-        Select Operator [SEL_18] (rows=366 width=178)
+        Select Operator [SEL_18] (rows=365 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_17] (rows=366 width=179)
+          Filter Operator [FIL_17] (rows=365 width=179)
             predicate:_col3 is null
             Join Operator [JOIN_16] (rows=500 width=179)
               Output:["_col0","_col1","_col3"],condition 
map:[{"":"{\"type\":\"Left 
Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"}
@@ -1730,9 +1730,9 @@ Stage-0
     Stage-1
       Reducer 3
       File Output Operator [FS_18]
-        Select Operator [SEL_17] (rows=183 width=178)
+        Select Operator [SEL_17] (rows=185 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_16] (rows=183 width=179)
+          Filter Operator [FIL_16] (rows=185 width=179)
             predicate:_col4 is null
             Join Operator [JOIN_15] (rows=250 width=179)
               Output:["_col0","_col1","_col4"],condition 
map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, 
_col1","1":"_col0, _col1"}
@@ -1806,7 +1806,7 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_12]
-        Join Operator [JOIN_10] (rows=133 width=178)
+        Join Operator [JOIN_10] (rows=131 width=178)
           Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left 
Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"}
         <-Map 1 [PARTITION-LEVEL SORT]
           PARTITION-LEVEL SORT [RS_8]
@@ -1858,7 +1858,7 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_12]
-        Join Operator [JOIN_10] (rows=133 width=178)
+        Join Operator [JOIN_10] (rows=131 width=178)
           Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left 
Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"}
         <-Map 1 [PARTITION-LEVEL SORT]
           PARTITION-LEVEL SORT [RS_8]
@@ -1900,7 +1900,7 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_12]
-        Join Operator [JOIN_10] (rows=133 width=178)
+        Join Operator [JOIN_10] (rows=131 width=178)
           Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left 
Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
         <-Map 1 [PARTITION-LEVEL SORT]
           PARTITION-LEVEL SORT [RS_8]
@@ -2021,16 +2021,16 @@ Stage-0
         <-Reducer 3 [PARTITION-LEVEL SORT]
           PARTITION-LEVEL SORT [RS_27]
             PartitionCols:_col2
-            Filter Operator [FIL_15] (rows=66 width=186)
+            Filter Operator [FIL_15] (rows=65 width=186)
               predicate:_col2 is not null
-              Group By Operator [GBY_14] (rows=66 width=186)
+              Group By Operator [GBY_14] (rows=65 width=186)
                 
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0,
 KEY._col1
               <-Reducer 2 [GROUP]
                 GROUP [RS_13]
                   PartitionCols:_col0, _col1
-                  Group By Operator [GBY_12] (rows=66 width=186)
+                  Group By Operator [GBY_12] (rows=65 width=186)
                     
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1
-                    Join Operator [JOIN_10] (rows=133 width=178)
+                    Join Operator [JOIN_10] (rows=131 width=178)
                       Output:["_col0","_col1"],condition 
map:[{"":"{\"type\":\"Left 
Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
                     <-Map 1 [PARTITION-LEVEL SORT]
                       PARTITION-LEVEL SORT [RS_8]
@@ -2634,7 +2634,7 @@ Stage-0
                     PartitionCols:_col0
                     Group By Operator [GBY_10] (rows=16 width=94)
                       
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                      Join Operator [JOIN_8] (rows=40 width=86)
+                      Join Operator [JOIN_8] (rows=39 width=86)
                         Output:["_col0"],condition 
map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
                       <-Map 1 [PARTITION-LEVEL SORT]
                         PARTITION-LEVEL SORT [RS_6]
@@ -2695,7 +2695,7 @@ Stage-0
                     PartitionCols:_col0
                     Group By Operator [GBY_10] (rows=16 width=94)
                       
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                      Join Operator [JOIN_8] (rows=40 width=86)
+                      Join Operator [JOIN_8] (rows=39 width=86)
                         Output:["_col0"],condition 
map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
                       <-Map 1 [PARTITION-LEVEL SORT]
                         PARTITION-LEVEL SORT [RS_6]
@@ -2755,7 +2755,7 @@ Stage-0
                     PartitionCols:_col0
                     Group By Operator [GBY_10] (rows=16 width=94)
                       
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                      Map Join Operator [MAPJOIN_22] (rows=40 width=86)
+                      Map Join Operator [MAPJOIN_22] (rows=39 width=86)
                         Conds:SEL_5._col0=SEL_5._col0(Inner),Output:["_col0"]
                       <-Select Operator [SEL_5] (rows=500 width=87)
                           Output:["_col0"]
@@ -2807,16 +2807,16 @@ Stage-0
           GROUP [RS_18]
             Group By Operator [GBY_17] (rows=1 width=16)
               Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"]
-              Select Operator [SEL_15] (rows=10 width=94)
+              Select Operator [SEL_15] (rows=9 width=94)
                 Output:["_col0","_col1"]
-                Group By Operator [GBY_14] (rows=10 width=94)
+                Group By Operator [GBY_14] (rows=9 width=94)
                   
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
                 <-Reducer 2 [GROUP]
                   GROUP [RS_13]
                     PartitionCols:_col0
-                    Group By Operator [GBY_12] (rows=10 width=94)
+                    Group By Operator [GBY_12] (rows=9 width=94)
                       
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
-                      Join Operator [JOIN_10] (rows=20 width=86)
+                      Join Operator [JOIN_10] (rows=19 width=86)
                         Output:["_col0"],condition map:[{"":"{\"type\":\"Left 
Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
                       <-Map 1 [PARTITION-LEVEL SORT]
                         PARTITION-LEVEL SORT [RS_8]
@@ -5215,9 +5215,9 @@ Stage-2
               Reducer 2
               File Output Operator [FS_11]
                 table:{"name:":"default.dest_j1_n14"}
-                Select Operator [SEL_9] (rows=809 width=95)
+                Select Operator [SEL_9] (rows=791 width=95)
                   Output:["_col0","_col1"]
-                  Join Operator [JOIN_8] (rows=809 width=178)
+                  Join Operator [JOIN_8] (rows=791 width=178)
                     Output:["_col0","_col2"],condition 
map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
                   <-Map 1 [PARTITION-LEVEL SORT]
                     PARTITION-LEVEL SORT [RS_6]
@@ -5241,7 +5241,7 @@ Stage-2
           Map 4
           File Output Operator [FS_11]
             table:{"name:":"default.dest_j1_n14"}
-            Select Operator [SEL_9] (rows=809 width=95)
+            Select Operator [SEL_9] (rows=791 width=95)
               Output:["_col0","_col1"]
               Map Join Operator [MAPJOIN_16]
                 
Conds:TS_14.reducesinkkey0=TS_14.reducesinkkey0(Inner),Output:["_col0","_col2"]
@@ -5496,7 +5496,7 @@ Stage-0
                 Stage-1
                   Reducer 2
                   File Output Operator [FS_10]
-                    Join Operator [JOIN_8] (rows=809 width=356)
+                    Join Operator [JOIN_8] (rows=791 width=356)
                       Output:["_col0","_col1","_col2","_col3"],condition 
map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
                     <-Map 1 [PARTITION-LEVEL SORT]
                       PARTITION-LEVEL SORT [RS_6]

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out 
b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
index 070fca7..c253fd2 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out
@@ -251,7 +251,7 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_5]
-        Group By Operator [GBY_3] (rows=309/309 width=95)
+        Group By Operator [GBY_3] (rows=316/309 width=95)
           
Output:["_col0","_col1"],aggregations:["count(KEY._col0)"],keys:KEY._col0
         <-Map 1 [SIMPLE_EDGE]
           SHUFFLE [RS_2]
@@ -298,7 +298,7 @@ Stage-0
           Output:["_col0"],aggregations:["count()"]
         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]
           PARTITION_ONLY_SHUFFLE [RS_10]
-            Merge Join Operator [MERGEJOIN_18] (rows=267/0 width=8)
+            Merge Join Operator [MERGEJOIN_18] (rows=262/0 width=8)
               Conds:RS_6._col0=RS_7._col0(Inner)
             <-Map 1 [SIMPLE_EDGE]
               SHUFFLE [RS_6]
@@ -347,7 +347,7 @@ Stage-0
           Output:["_col0"],aggregations:["count()"]
         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]
           PARTITION_ONLY_SHUFFLE [RS_10]
-            Merge Join Operator [MERGEJOIN_18] (rows=267/1019 width=8)
+            Merge Join Operator [MERGEJOIN_18] (rows=262/1019 width=8)
               Conds:RS_6._col0=RS_7._col0(Inner)
             <-Map 1 [SIMPLE_EDGE]
               SHUFFLE [RS_6]
@@ -451,9 +451,9 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_10]
-        Select Operator [SEL_9] (rows=809/1028 width=178)
+        Select Operator [SEL_9] (rows=791/1028 width=178)
           Output:["_col0","_col1"]
-          Merge Join Operator [MERGEJOIN_15] (rows=809/1028 width=178)
+          Merge Join Operator [MERGEJOIN_15] (rows=791/1028 width=178)
             Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"]
           <-Map 1 [SIMPLE_EDGE]
             SHUFFLE [RS_6]
@@ -513,9 +513,9 @@ Stage-0
     Stage-1
       Reducer 2
       File Output Operator [FS_9]
-        Transform Operator [SCR_8] (rows=809/1028 width=178)
+        Transform Operator [SCR_8] (rows=791/1028 width=178)
           command:cat
-          Merge Join Operator [MERGEJOIN_14] (rows=809/1028 width=178)
+          Merge Join Operator [MERGEJOIN_14] (rows=791/1028 width=178)
             Conds:RS_3.key=RS_5.key(Inner),Output:["_col0","_col1"]
           <-Map 1 [SIMPLE_EDGE]
             SHUFFLE [RS_3]

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out 
b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
index bccfa04..42bad01 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
@@ -847,9 +847,9 @@ Stage-0
     Stage-1
       Map 2 vectorized
       File Output Operator [FS_34]
-        Select Operator [SEL_33] (rows=391/480 width=186)
+        Select Operator [SEL_33] (rows=399/480 width=186)
           Output:["_col0","_col1","_col2"]
-          Map Join Operator [MAPJOIN_32] (rows=391/480 width=186)
+          Map Join Operator [MAPJOIN_32] (rows=399/480 width=186)
             
BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"]
           <-Map 1 [CUSTOM_EDGE] vectorized
             MULTICAST [RS_29]

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out 
b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
index 5c17512..9d14557 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out
@@ -44,11 +44,11 @@ Stage-0
     Stage-1
       Reducer 3
       File Output Operator [FS_12]
-        Select Operator [SEL_11] (rows=2076/10 width=553)
+        Select Operator [SEL_11] (rows=2048/10 width=552)
           
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
         <-Reducer 2 [SIMPLE_EDGE]
           SHUFFLE [RS_10]
-            Merge Join Operator [MERGEJOIN_17] (rows=2076/10 width=553)
+            Merge Join Operator [MERGEJOIN_17] (rows=2048/10 width=552)
               
Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
             <-Map 1 [SIMPLE_EDGE]
               SHUFFLE [RS_6]
@@ -143,7 +143,7 @@ Stage-0
           Output:["_col0"],aggregations:["count()"]
         <-Reducer 2 [CUSTOM_SIMPLE_EDGE]
           PARTITION_ONLY_SHUFFLE [RS_10]
-            Merge Join Operator [MERGEJOIN_18] (rows=2076/10 width=8)
+            Merge Join Operator [MERGEJOIN_18] (rows=2048/10 width=8)
               Conds:RS_6._col0=RS_7._col0(Inner)
             <-Map 1 [SIMPLE_EDGE]
               SHUFFLE [RS_6]
@@ -232,16 +232,16 @@ Stage-0
     Stage-1
       Reducer 4
       File Output Operator [FS_15]
-        Select Operator [SEL_14] (rows=623/5 width=11)
+        Select Operator [SEL_14] (rows=631/5 width=11)
           Output:["_col0","_col1"]
         <-Reducer 3 [SIMPLE_EDGE]
           SHUFFLE [RS_13]
-            Group By Operator [GBY_11] (rows=623/5 width=11)
+            Group By Operator [GBY_11] (rows=631/5 width=11)
               Output:["_col0","_col1"],aggregations:["count()"],keys:KEY._col0
             <-Reducer 2 [SIMPLE_EDGE]
               SHUFFLE [RS_10]
                 PartitionCols:_col0
-                Merge Join Operator [MERGEJOIN_20] (rows=2076/10 width=3)
+                Merge Join Operator [MERGEJOIN_20] (rows=2048/10 width=3)
                   Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"]
                 <-Map 1 [SIMPLE_EDGE]
                   SHUFFLE [RS_6]

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out 
b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
index fd71c0c..75f29fa 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
@@ -114,9 +114,9 @@ Stage-3
                 <-Reducer 4 [CUSTOM_SIMPLE_EDGE]
                   File Output Operator [FS_19]
                     table:{"name:":"default.src_multi2_n7"}
-                    Select Operator [SEL_18] (rows=849/508 width=178)
+                    Select Operator [SEL_18] (rows=830/508 width=178)
                       Output:["_col0","_col1"]
-                      Merge Join Operator [MERGEJOIN_26] (rows=849/508 
width=178)
+                      Merge Join Operator [MERGEJOIN_26] (rows=830/508 
width=178)
                         
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col0","_col3"]
                       <-Map 7 [SIMPLE_EDGE]
                         SHUFFLE [RS_16]
@@ -154,7 +154,7 @@ Stage-3
                                       TableScan [TS_3] (rows=25/25 width=175)
                                         Output:["key","value"]
                   PARTITION_ONLY_SHUFFLE [RS_2]
-                    Select Operator [SEL_1] (rows=849/508 width=178)
+                    Select Operator [SEL_1] (rows=830/508 width=178)
                       Output:["key","value"]
                        Please refer to the previous Select Operator [SEL_18]
 

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out 
b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
index 8b7b11d..d993905 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
@@ -675,9 +675,9 @@ Stage-0
     Stage-1
       Map 2 vectorized
       File Output Operator [FS_34]
-        Select Operator [SEL_33] (rows=391 width=186)
+        Select Operator [SEL_33] (rows=399 width=186)
           Output:["_col0","_col1","_col2"]
-          Map Join Operator [MAPJOIN_32] (rows=391 width=186)
+          Map Join Operator [MAPJOIN_32] (rows=399 width=186)
             
BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"]
           <-Map 1 [CUSTOM_EDGE] vectorized
             MULTICAST [RS_29]

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out 
b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
index 5b9149c..910a812 100644
--- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out
@@ -56,7 +56,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 25057 Data size: 200456 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 24737 Data size: 197896 Basic 
stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
                           aggregations: count()
                           mode: hash
@@ -175,7 +175,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 25057 Data size: 200456 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 24737 Data size: 197896 Basic 
stats: COMPLETE Column stats: COMPLETE
                         HybridGraceHashJoin: true
                         Group By Operator
                           aggregations: count()
@@ -293,7 +293,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 18702 Data size: 149616 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 18464 Data size: 147712 Basic 
stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
                           aggregations: count()
                           mode: hash
@@ -408,7 +408,7 @@ STAGE PLANS:
                           1 _col0 (type: int)
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 18702 Data size: 149616 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 18464 Data size: 147712 Basic 
stats: COMPLETE Column stats: COMPLETE
                         HybridGraceHashJoin: true
                         Group By Operator
                           aggregations: count()
@@ -521,7 +521,7 @@ STAGE PLANS:
                         1 _col0 (type: int)
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 25057 Data size: 200456 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 24737 Data size: 197896 Basic 
stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: count()
                         mode: hash
@@ -630,7 +630,7 @@ STAGE PLANS:
                         1 _col0 (type: int)
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 25057 Data size: 200456 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 24737 Data size: 197896 Basic 
stats: COMPLETE Column stats: COMPLETE
                       HybridGraceHashJoin: true
                       Group By Operator
                         aggregations: count()

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out 
b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
index 3bacb4a..a3a77f9 100644
--- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_2.q.out
@@ -72,7 +72,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 1
                         2 Map 4
-                      Statistics: Num rows: 261 Data size: 2088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 250 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: count()
                         mode: hash
@@ -200,7 +200,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 1
                         2 Map 4
-                      Statistics: Num rows: 261 Data size: 2088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 250 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       HybridGraceHashJoin: true
                       Group By Operator
                         aggregations: count()
@@ -343,7 +343,7 @@ STAGE PLANS:
                         0 Map 1
                         2 Map 4
                         3 Map 5
-                      Statistics: Num rows: 1694 Data size: 13552 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1584 Data size: 12672 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: count()
                         mode: hash
@@ -492,7 +492,7 @@ STAGE PLANS:
                         0 Map 1
                         2 Map 4
                         3 Map 5
-                      Statistics: Num rows: 1694 Data size: 13552 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1584 Data size: 12672 Basic stats: 
COMPLETE Column stats: COMPLETE
                       HybridGraceHashJoin: true
                       Group By Operator
                         aggregations: count()
@@ -671,7 +671,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 1
                         2 Map 6
-                      Statistics: Num rows: 261 Data size: 2088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 250 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: count()
                         mode: hash
@@ -729,7 +729,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 7
                         2 Map 10
-                      Statistics: Num rows: 261 Data size: 2088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 265 Data size: 2120 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: count()
                         mode: hash
@@ -915,7 +915,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 1
                         2 Map 6
-                      Statistics: Num rows: 261 Data size: 2088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 250 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       HybridGraceHashJoin: true
                       Group By Operator
                         aggregations: count()
@@ -974,7 +974,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 7
                         2 Map 10
-                      Statistics: Num rows: 261 Data size: 2088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 265 Data size: 2120 Basic stats: 
COMPLETE Column stats: COMPLETE
                       HybridGraceHashJoin: true
                       Group By Operator
                         aggregations: count()
@@ -1157,7 +1157,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 1
                         2 Map 4
-                      Statistics: Num rows: 20 Data size: 1780 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 19 Data size: 1691 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -1169,7 +1169,7 @@ STAGE PLANS:
                         input vertices:
                           1 Map 5
                           2 Map 6
-                        Statistics: Num rows: 204 Data size: 1632 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 196 Data size: 1568 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Group By Operator
                           aggregations: count()
                           mode: hash
@@ -1347,7 +1347,7 @@ STAGE PLANS:
                       input vertices:
                         0 Map 1
                         2 Map 4
-                      Statistics: Num rows: 20 Data size: 1780 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 19 Data size: 1691 Basic stats: 
COMPLETE Column stats: COMPLETE
                       HybridGraceHashJoin: true
                       Map Join Operator
                         condition map:
@@ -1360,7 +1360,7 @@ STAGE PLANS:
                         input vertices:
                           1 Map 5
                           2 Map 6
-                        Statistics: Num rows: 204 Data size: 1632 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 196 Data size: 1568 Basic stats: 
COMPLETE Column stats: COMPLETE
                         HybridGraceHashJoin: true
                         Group By Operator
                           aggregations: count()

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/ql/src/test/results/clientpositive/tez/tez-tag.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/tez-tag.q.out 
b/ql/src/test/results/clientpositive/tez/tez-tag.q.out
index 55ce485..cf96067 100644
--- a/ql/src/test/results/clientpositive/tez/tez-tag.q.out
+++ b/ql/src/test/results/clientpositive/tez/tez-tag.q.out
@@ -190,7 +190,7 @@ Stage-0
           PARTITION_ONLY_SHUFFLE [RS_17]
             Group By Operator [GBY_16] (rows=1 width=8)
               Output:["_col0"],aggregations:["count()"]
-              Merge Join Operator [MERGEJOIN_30] (rows=63 width=8)
+              Merge Join Operator [MERGEJOIN_30] (rows=64 width=8)
                 Conds:RS_12._col0=RS_13._col0(Inner)
               <-Map 6 [SIMPLE_EDGE]
                 SHUFFLE [RS_13]

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
index 4e4dfb7..b630fa3 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
 import org.apache.hadoop.hive.common.ndv.fm.FMSketch;
 import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils;
 import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
 
 public class NumDistinctValueEstimatorFactory {
 
@@ -44,7 +45,7 @@ public class NumDistinctValueEstimatorFactory {
       if (isFMSketch(buf)) {
         return FMSketchUtils.deserializeFM(buf);
       } else {
-        return HyperLogLog.builder().build().deserialize(buf);
+        return HyperLogLogUtils.deserializeHLL(buf);
       }
     } catch (IOException e) {
       throw new RuntimeException(e);
@@ -56,7 +57,7 @@ public class NumDistinctValueEstimatorFactory {
     if (n instanceof FMSketch) {
       return new FMSketch(((FMSketch) n).getNumBitVectors());
     } else {
-      return HyperLogLog.builder().build();
+      return HyperLogLog.builder().setSizeOptimized().build();
     }
   }
 
@@ -65,7 +66,7 @@ public class NumDistinctValueEstimatorFactory {
     if ("fm".equals(func.toLowerCase())) {
       return new FMSketch(numBitVectors);
     } else if ("hll".equals(func.toLowerCase())) {
-      return HyperLogLog.builder().build();
+      return HyperLogLog.builder().setSizeOptimized().build();
     } else {
       throw new RuntimeException("Can not recognize " + func);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
index 12897fc..422bfbe 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java
@@ -62,6 +62,31 @@ public class HLLDenseRegister implements HLLRegister {
     return set(registerIdx, (byte) lr);
   }
 
+  // this is a lossy invert of the function above, which produces a hashcode
+  // which collides with the current winner of the register (we lose all 
higher 
+  // bits, but we get all bits useful for lesser p-bit options)
+
+  // +-------------|-------------+
+  // |xxxx100000000|1000000000000|  (lr=9 + idx=1024)
+  // +-------------|-------------+
+  //                \
+  // +---------------|-----------+
+  // |xxxx10000000010|00000000000|  (lr=2 + idx=0)
+  // +---------------|-----------+
+
+  // This shows the relevant bits of the original hash value
+  // and how the conversion is moving bits from the index value
+  // over to the leading zero computation
+
+  public void extractLowBitsTo(HLLRegister dest) {
+    for (int idx = 0; idx < register.length; idx++) {
+      byte lr = register[idx]; // this can be a max of 65, never > 127
+      if (lr != 0) {
+        dest.add((long) ((1 << (p + lr - 1)) | idx));
+      }
+    }
+  }
+
   public boolean set(int idx, byte value) {
     boolean updated = false;
     if (idx < register.length && value > register[idx]) {

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
index d62b858..d5ac54a 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.common.ndv.hll;
 
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.TreeMap;
 
 public class HLLSparseRegister implements HLLRegister {
@@ -187,6 +188,18 @@ public class HLLSparseRegister implements HLLRegister {
     return sparseMap;
   }
 
+  // this is effectively the same as the dense register impl.
+  public void extractLowBitsTo(HLLRegister dest) {
+    for (Entry<Integer, Byte> entry : getSparseMap().entrySet()) {
+      int idx = entry.getKey();
+      byte lr = entry.getValue(); // this can be a max of 65, never > 127
+      if (lr != 0) {
+        // should be a no-op for sparse
+        dest.add((long) ((1 << (p + lr - 1)) | idx));
+      }
+    }
+  }
+
   public int getP() {
     return p;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index a3cc989..91a6865 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -18,10 +18,8 @@
 
 package org.apache.hadoop.hive.common.ndv.hll;
 
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.Map;
@@ -160,6 +158,13 @@ public class HyperLogLog implements 
NumDistinctValueEstimator {
       return this;
     }
 
+    public HyperLogLogBuilder setSizeOptimized() {
+      // allowing this to be increased via config breaks the merge impl
+      // p=10 = ~1kb per vector or smaller
+      this.numRegisterIndexBits = 10;
+      return this;
+    }
+
     public HyperLogLogBuilder setEncoding(EncodingType enc) {
       this.encoding = enc;
       return this;
@@ -431,12 +436,23 @@ public class HyperLogLog implements 
NumDistinctValueEstimator {
    * @throws IllegalArgumentException
    */
   public void merge(HyperLogLog hll) {
-    if (p != hll.p || chosenHashBits != hll.chosenHashBits) {
+    if (chosenHashBits != hll.chosenHashBits) {
       throw new IllegalArgumentException(
           "HyperLogLog cannot be merged as either p or hashbits are different. 
Current: "
               + toString() + " Provided: " + hll.toString());
     }
 
+    if (p > hll.p) {
+      throw new IllegalArgumentException(
+          "HyperLogLog cannot merge a smaller p into a larger one : "
+              + toString() + " Provided: " + hll.toString());
+    }
+
+    if (p != hll.p) {
+      // invariant: p > hll.p
+      hll = hll.squash(p);
+    }
+
     EncodingType otherEncoding = hll.getEncoding();
 
     if (encoding.equals(EncodingType.SPARSE) && 
otherEncoding.equals(EncodingType.SPARSE)) {
@@ -464,7 +480,37 @@ public class HyperLogLog implements 
NumDistinctValueEstimator {
   }
 
   /**
-   * Converts sparse to dense hll register
+   * Reduces the accuracy of the HLL provided to a smaller size
+   * @param p0 
+   *         - new p size for the new HyperLogLog (smaller or no change)
+   * @return reduced (or same) HyperLogLog instance
+   */
+  public HyperLogLog squash(final int p0) {
+    if (p0 > p) {
+      throw new IllegalArgumentException(
+          "HyperLogLog cannot be be squashed to be bigger. Current: "
+              + toString() + " Provided: " + p0);
+    }
+
+    if (p0 == p) {
+      return this;
+    }
+
+    final HyperLogLog hll = new HyperLogLogBuilder()
+        .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE)
+        .enableNoBias(noBias).build();
+    final HLLDenseRegister result = hll.denseRegister;
+
+    if (encoding == EncodingType.SPARSE) {
+      sparseRegister.extractLowBitsTo(result);
+    } else if (encoding == EncodingType.DENSE) {
+      denseRegister.extractLowBitsTo(result);
+    }
+    return hll;
+  }
+
+  /**
+   * Converts sparse to dense hll register.
    * @param sparseRegister
    *          - sparse register to be converted
    * @return converted dense register
@@ -576,14 +622,7 @@ public class HyperLogLog implements 
NumDistinctValueEstimator {
 
   @Override
   public NumDistinctValueEstimator deserialize(byte[] buf) {
-    InputStream is = new ByteArrayInputStream(buf);
-    try {
-      HyperLogLog result = HyperLogLogUtils.deserializeHLL(is);
-      is.close();
-      return result;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
+    return HyperLogLogUtils.deserializeHLL(buf);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
index 4e6510b..aeba2e9 100644
--- 
a/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
+++ 
b/standalone-metastore/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.common.ndv.hll;
 
+import java.io.ByteArrayInputStream;
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -126,7 +127,7 @@ public class HyperLogLogUtils {
   }
 
   /**
-   * Refer serializeHLL() for format of serialization. This funtions
+   * Refer serializeHLL() for format of serialization. This function
    * deserializes the serialized hyperloglogs
    * @param in
    *          - input stream
@@ -198,6 +199,22 @@ public class HyperLogLogUtils {
     return result;
   }
 
+  /**
+   * This function deserializes the serialized hyperloglogs from a byte array.
+   * @param buf - to deserialize
+   * @return HyperLogLog
+   */
+  public static HyperLogLog deserializeHLL(final byte[] buf) {
+    InputStream is = new ByteArrayInputStream(buf); // TODO: use faster 
non-sync inputstream
+    try {
+      HyperLogLog result = deserializeHLL(is);
+      is.close();
+      return result;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   private static void bitpackHLLRegister(OutputStream out, byte[] register, 
int bitWidth)
       throws IOException {
     int bitsLeft = 8;

http://git-wip-us.apache.org/repos/asf/hive/blob/13960aa9/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
 
b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
index 617d9c3..e014fb5 100644
--- 
a/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
+++ 
b/standalone-metastore/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
@@ -37,14 +37,18 @@ public class TestHyperLogLog {
     HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
     HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
         .setEncoding(EncodingType.DENSE).build();
+    HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+        .setEncoding(EncodingType.DENSE).build();
     int size = 1000;
     for (int i = 0; i < size; i++) {
       hll.addLong(i);
       hll2.addLong(size + i);
       hll3.addLong(2 * size + i);
+      hll4.addLong(3 * size + i);
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
+    double delta4 = threshold * (4*size) / 100;
     assertEquals((double) size, (double) hll.count(), delta);
     assertEquals((double) size, (double) hll2.count(), delta);
 
@@ -63,8 +67,13 @@ public class TestHyperLogLog {
     assertEquals((double) 3 * size, (double) hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
-    // invalid merge -- register set size doesn't match
+    // valid merge -- register set size gets bigger (also 4k items 
     hll.merge(hll4);
+    assertEquals((double) 4 * size, (double) hll.count(), delta4);
+    assertEquals(EncodingType.DENSE, hll.getEncoding());
+
+    // invalid merge -- smaller register merge to bigger
+    hll.merge(hll5);
   }
 
   @Test(expected = IllegalArgumentException.class)
@@ -74,14 +83,18 @@ public class TestHyperLogLog {
     HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
     HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
         .setEncoding(EncodingType.SPARSE).build();
+    HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+        .setEncoding(EncodingType.SPARSE).build();
     int size = 500;
     for (int i = 0; i < size; i++) {
       hll.addLong(i);
       hll2.addLong(size + i);
       hll3.addLong(2 * size + i);
+      hll4.addLong(3 * size + i);
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
+    double delta4 = threshold * (4*size) / 100;
     assertEquals((double) size, (double) hll.count(), delta);
     assertEquals((double) size, (double) hll2.count(), delta);
 
@@ -100,8 +113,13 @@ public class TestHyperLogLog {
     assertEquals((double) 3 * size, (double) hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
-    // invalid merge -- register set size doesn't match
+    // valid merge -- register set size gets bigger & dense automatically
     hll.merge(hll4);
+    assertEquals((double) 4 * size, (double) hll.count(), delta4);
+    assertEquals(EncodingType.DENSE, hll.getEncoding());
+
+    // invalid merge -- smaller register merge to bigger
+    hll.merge(hll5);
   }
 
   @Test(expected = IllegalArgumentException.class)
@@ -111,11 +129,14 @@ public class TestHyperLogLog {
     HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).build();
     HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
         .setEncoding(EncodingType.DENSE).build();
+    HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+        .setEncoding(EncodingType.DENSE).build();
     int size = 1000;
     for (int i = 0; i < size; i++) {
       hll.addLong(i);
       hll2.addLong(size + i);
       hll3.addLong(2 * size + i);
+      hll4.addLong(3 * size + i);
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
@@ -137,8 +158,13 @@ public class TestHyperLogLog {
     assertEquals((double) 3 * size, (double) hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
+    // merge should convert hll2 to DENSE
+    hll2.merge(hll4);
+    assertEquals((double) 2 * size, (double) hll2.count(), delta);
+    assertEquals(EncodingType.DENSE, hll2.getEncoding());
+
+    // invalid merge -- smaller register merge to bigger
+    hll.merge(hll5);
   }
 
   @Test(expected = IllegalArgumentException.class)
@@ -148,11 +174,14 @@ public class TestHyperLogLog {
     HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
     HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
         .setEncoding(EncodingType.SPARSE).build();
+    HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+        .setEncoding(EncodingType.SPARSE).build();
     int size = 1000;
     for (int i = 0; i < size; i++) {
       hll.addLong(i);
       hll2.addLong(size + i);
       hll3.addLong(2 * size + i);
+      hll4.addLong(3 * size + i);
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
@@ -174,8 +203,14 @@ public class TestHyperLogLog {
     assertEquals((double) 3 * size, (double) hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
+    // merge should convert hll3 to DENSE
+    hll3.merge(hll4);
+    assertEquals((double) 2 * size, (double) hll3.count(), delta);
+    assertEquals(EncodingType.DENSE, hll3.getEncoding());
+
+    // invalid merge -- smaller register merge to bigger
+    hll.merge(hll5);
+
   }
 
   @Test(expected = IllegalArgumentException.class)
@@ -185,11 +220,14 @@ public class TestHyperLogLog {
     HyperLogLog hll3 = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build();
     HyperLogLog hll4 = HyperLogLog.builder().setNumRegisterIndexBits(16)
         .setEncoding(EncodingType.SPARSE).build();
+    HyperLogLog hll5 = HyperLogLog.builder().setNumRegisterIndexBits(12)
+        .setEncoding(EncodingType.SPARSE).build();
     int size = 1000;
     for (int i = 0; i < size; i++) {
       hll.addLong(i);
       hll2.addLong(size + i);
       hll3.addLong(2 * size + i);
+      hll4.addLong(3 * size + i);
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
@@ -211,8 +249,13 @@ public class TestHyperLogLog {
     assertEquals((double) 3 * size, (double) hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
-    // invalid merge -- register set size doesn't match
-    hll.merge(hll4);
+    // merge should convert hll2 to DENSE
+    hll2.merge(hll4);
+    assertEquals((double) 2 * size, (double) hll2.count(), delta);
+    assertEquals(EncodingType.DENSE, hll2.getEncoding());
+
+    // invalid merge -- smaller register merge to bigger
+    hll.merge(hll5);
   }
 
   @Test
@@ -227,4 +270,69 @@ public class TestHyperLogLog {
     double delta = threshold * size / 100;
     assertEquals((double) size, (double) hll.count(), delta);
   }
+
+  @Test
+  public void testHLLSquash() {
+
+    int[] sizes = new int[] { 500, 1000, 2300, 4096};
+    int minBits = 9;
+    for (final int size : sizes) {
+
+      HyperLogLog hlls[] = new HyperLogLog[16];
+      for (int k = minBits; k < hlls.length; k++) {
+        final HyperLogLog hll = HyperLogLog.builder()
+            
.setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(k).build();
+        for (int i = 0; i < size; i++) {
+          hll.addLong(i);
+        }
+        hlls[k] = hll;
+      }
+
+      for (int k = minBits; k < hlls.length; k++) {
+        for (int j = k + 1; j < hlls.length; j++) {
+          final HyperLogLog large = hlls[j];
+          final HyperLogLog small = hlls[k];
+          final HyperLogLog mush = large
+              .squash(small.getNumRegisterIndexBits());
+          assertEquals(small.count(), mush.count(), 0);
+          double delta = Math.ceil(small.getStandardError()*size);
+          assertEquals((double) size, (double) mush.count(), delta);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testHLLDenseDenseSquash() {
+    HyperLogLog p14HLL = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(14).build();
+    HyperLogLog p10HLL = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(10).build();
+    int size = 1_000_000;
+    for (int i = 0; i < size; i++) {
+      p14HLL.addLong(i);
+    }
+
+    for (int i = 0; i < 10_000; i++) {
+      p10HLL.addLong(i);
+    }
+
+    p14HLL.squash(p10HLL.getNumRegisterIndexBits());
+    assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 
100.0);
+  }
+
+  @Test
+  public void testHLLSparseDenseSquash() {
+    HyperLogLog p14HLL = 
HyperLogLog.builder().setEncoding(EncodingType.SPARSE).setNumRegisterIndexBits(14).build();
+    HyperLogLog p10HLL = 
HyperLogLog.builder().setEncoding(EncodingType.DENSE).setNumRegisterIndexBits(10).build();
+    int size = 2000;
+    for (int i = 0; i < size; i++) {
+      p14HLL.addLong(i);
+    }
+
+    for (int i = 0; i < 10_000; i++) {
+      p10HLL.addLong(i);
+    }
+
+    p14HLL.squash(p10HLL.getNumRegisterIndexBits());
+    assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 
100.0);
+  }
 }

[02/14] hive git commit: HIVE-18079 : Statistics: Allow HyperLogLog to be merged to the lowest-common-denominator bit-size (Gopal V via Prasanth J)

Reply via email to