[hive] branch master updated: HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V)

prasanthj Fri, 08 Mar 2019 18:54:18 -0800

This is an automated email from the ASF dual-hosted git repository.

prasanthj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new e7f7fe3  HIVE-20656: Sensible defaults: Map aggregation memory configs 
are too aggressive (Prasanth Jayachandran reviewed by Gopal V)
e7f7fe3 is described below

commit e7f7fe3b1cf443823a05e5409f55c55475fb5b48
Author: Prasanth Jayachandran <prasan...@apache.org>
AuthorDate: Fri Mar 8 18:52:15 2019 -0800

    HIVE-20656: Sensible defaults: Map aggregation memory configs are too 
aggressive (Prasanth Jayachandran reviewed by Gopal V)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  4 ++--
 .../results/clientpositive/groupby_position.q.out  | 24 +++++++++++-----------
 .../perf/tez/constraints/query94.q.out             |  4 ++--
 .../perf/tez/constraints/query95.q.out             |  4 ++--
 .../results/clientpositive/perf/tez/query94.q.out  |  4 ++--
 .../results/clientpositive/perf/tez/query95.q.out  |  4 ++--
 6 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 0dea099..076035b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1741,14 +1741,14 @@ public class HiveConf extends Configuration {
         "How many rows with the same key value should be cached in memory per 
smb joined table."),
     HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000,
         "Number of rows after which size of the grouping keys/aggregation 
classes is performed"),
-    HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.99,
+    HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5,
         "Portion of total memory to be used by map-side group aggregation hash 
table"),
     
HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory",
 (float) 0.3,
         "Portion of total memory to be used by map-side group aggregation hash 
table, when this group by is followed by map join"),
     
HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", 
(float) 0.9,
         "The max memory to be used by map-side group aggregation hash 
table.\n" +
         "If the memory usage is higher than this number, force to flush data"),
-    HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 
0.5,
+    HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 
0.99,
         "Hash aggregation will be turned off if the ratio between hash  table 
size and input rows is bigger than this number. \n" +
         "Set to 1 to make sure hash aggregation is never turned off."),
     HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out 
b/ql/src/test/results/clientpositive/groupby_position.q.out
index 296279a..7305df0 100644
--- a/ql/src/test/results/clientpositive/groupby_position.q.out
+++ b/ql/src/test/results/clientpositive/groupby_position.q.out
@@ -69,7 +69,7 @@ STAGE PLANS:
                 keys: key (type: string), value (type: string), substr(value, 
5) (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 83 Data size: 30710 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 166 Data size: 61420 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
                   table:
@@ -168,7 +168,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
-              Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE 
Column stats: COMPLETE
       Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
@@ -176,14 +176,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: UDFToInteger(_col0) (type: int), _col1 (type: 
string), CAST( _col2 AS STRING) (type: string)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -192,7 +192,7 @@ STAGE PLANS:
             Select Operator
               expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string)
               outputColumnNames: key, val1, val2
-              Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE 
Column stats: COMPLETE
               Group By Operator
                 aggregations: compute_stats(key, 'hll'), compute_stats(val1, 
'hll'), compute_stats(val2, 'hll')
                 mode: hash
@@ -354,7 +354,7 @@ STAGE PLANS:
                 keys: value (type: string), key (type: string), substr(value, 
5) (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 83 Data size: 30710 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 166 Data size: 61420 Basic stats: 
COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
                   table:
@@ -453,7 +453,7 @@ STAGE PLANS:
               key expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
               sort order: +++
               Map-reduce partition columns: _col0 (type: string), _col1 (type: 
string)
-              Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE 
Column stats: COMPLETE
       Execution mode: vectorized
       Reduce Operator Tree:
         Group By Operator
@@ -461,14 +461,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string), KEY._col1 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE 
Column stats: COMPLETE
+          Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE 
Column stats: COMPLETE
           Select Operator
             expressions: UDFToInteger(_col1) (type: int), _col0 (type: 
string), CAST( _col2 AS STRING) (type: string)
             outputColumnNames: _col0, _col1, _col2
-            Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE 
Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE 
Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -477,7 +477,7 @@ STAGE PLANS:
             Select Operator
               expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string)
               outputColumnNames: key, val1, val2
-              Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE 
Column stats: COMPLETE
               Group By Operator
                 aggregations: compute_stats(key, 'hll'), compute_stats(val1, 
'hll'), compute_stats(val2, 'hll')
                 mode: hash
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out
index 1dba4fb..ab688b2 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out
@@ -103,12 +103,12 @@ Stage-0
                   PARTITION_ONLY_SHUFFLE [RS_160]
                     Group By Operator [GBY_159] (rows=1 width=232)
                       
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
-                      Group By Operator [GBY_158] (rows=2511437 width=228)
+                      Group By Operator [GBY_158] (rows=5022875 width=228)
                         
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
                       <-Reducer 6 [SIMPLE_EDGE]
                         SHUFFLE [RS_73]
                           PartitionCols:_col0
-                          Group By Operator [GBY_72] (rows=2511437 width=228)
+                          Group By Operator [GBY_72] (rows=5022875 width=228)
                             
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
                             Select Operator [SEL_41] (rows=5022875 width=229)
                               Output:["_col4","_col5","_col6"]
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out
index 523fa2c..420cd78 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out
@@ -117,12 +117,12 @@ Stage-0
                   PARTITION_ONLY_SHUFFLE [RS_278]
                     Group By Operator [GBY_277] (rows=1 width=232)
                       
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
-                      Group By Operator [GBY_276] (rows=2511437 width=228)
+                      Group By Operator [GBY_276] (rows=5022875 width=228)
                         
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
                       <-Reducer 6 [SIMPLE_EDGE]
                         SHUFFLE [RS_109]
                           PartitionCols:_col0
-                          Group By Operator [GBY_108] (rows=2511437 width=228)
+                          Group By Operator [GBY_108] (rows=5022875 width=228)
                             
Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3
                             Merge Join Operator [MERGEJOIN_235] (rows=5022875 
width=227)
                               
Conds:RS_55._col3=RS_275._col0(Inner),Output:["_col3","_col4","_col5"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
index e6ac653..fdd2fd0 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
@@ -103,12 +103,12 @@ Stage-0
                   PARTITION_ONLY_SHUFFLE [RS_162]
                     Group By Operator [GBY_161] (rows=1 width=232)
                       
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
-                      Group By Operator [GBY_160] (rows=2511437 width=228)
+                      Group By Operator [GBY_160] (rows=5022875 width=228)
                         
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
                       <-Reducer 6 [SIMPLE_EDGE]
                         SHUFFLE [RS_74]
                           PartitionCols:_col0
-                          Group By Operator [GBY_73] (rows=2511437 width=228)
+                          Group By Operator [GBY_73] (rows=5022875 width=228)
                             
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
                             Select Operator [SEL_42] (rows=5022875 width=229)
                               Output:["_col4","_col5","_col6"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query95.q.out
index da131d6..0a8c9a9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out
@@ -117,12 +117,12 @@ Stage-0
                   PARTITION_ONLY_SHUFFLE [RS_286]
                     Group By Operator [GBY_285] (rows=1 width=232)
                       
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
-                      Group By Operator [GBY_284] (rows=2511437 width=228)
+                      Group By Operator [GBY_284] (rows=5022875 width=228)
                         
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
                       <-Reducer 6 [SIMPLE_EDGE]
                         SHUFFLE [RS_115]
                           PartitionCols:_col0
-                          Group By Operator [GBY_114] (rows=2511437 width=228)
+                          Group By Operator [GBY_114] (rows=5022875 width=228)
                             
Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3
                             Merge Join Operator [MERGEJOIN_241] (rows=5022875 
width=227)
                               
Conds:RS_61._col3=RS_283._col0(Inner),Output:["_col3","_col4","_col5"]

[hive] branch master updated: HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V)

Reply via email to