This is an automated email from the ASF dual-hosted git repository. prasanthj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new e7f7fe3 HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V) e7f7fe3 is described below commit e7f7fe3b1cf443823a05e5409f55c55475fb5b48 Author: Prasanth Jayachandran <prasan...@apache.org> AuthorDate: Fri Mar 8 18:52:15 2019 -0800 HIVE-20656: Sensible defaults: Map aggregation memory configs are too aggressive (Prasanth Jayachandran reviewed by Gopal V) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++-- .../results/clientpositive/groupby_position.q.out | 24 +++++++++++----------- .../perf/tez/constraints/query94.q.out | 4 ++-- .../perf/tez/constraints/query95.q.out | 4 ++-- .../results/clientpositive/perf/tez/query94.q.out | 4 ++-- .../results/clientpositive/perf/tez/query95.q.out | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0dea099..076035b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1741,14 +1741,14 @@ public class HiveConf extends Configuration { "How many rows with the same key value should be cached in memory per smb joined table."), HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, "Number of rows after which size of the grouping keys/aggregation classes is performed"), - HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.99, + HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5, "Portion of total memory to be used by map-side group aggregation hash table"), HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3, "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"), HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9, "The max memory to be used by map-side group aggregation hash table.\n" + "If the memory usage is higher than this number, force to flush data"), - HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5, + HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.99, "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" + "Set to 1 to make sure hash aggregation is never turned off."), HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 296279a..7305df0 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -69,7 +69,7 @@ STAGE PLANS: keys: key (type: string), value (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -168,7 +168,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -176,14 +176,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -192,7 +192,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, val1, val2 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') mode: hash @@ -354,7 +354,7 @@ STAGE PLANS: keys: value (type: string), key (type: string), substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -453,7 +453,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 30710 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -461,14 +461,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), CAST( _col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -477,7 +477,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, val1, val2 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') mode: hash diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out index 1dba4fb..ab688b2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out @@ -103,12 +103,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_160] Group By Operator [GBY_159] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_158] (rows=2511437 width=228) + Group By Operator [GBY_158] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_72] (rows=2511437 width=228) + Group By Operator [GBY_72] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 Select Operator [SEL_41] (rows=5022875 width=229) Output:["_col4","_col5","_col6"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out index 523fa2c..420cd78 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out @@ -117,12 +117,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_278] Group By Operator [GBY_277] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_276] (rows=2511437 width=228) + Group By Operator [GBY_276] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=2511437 width=228) + Group By Operator [GBY_108] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 Merge Join Operator [MERGEJOIN_235] (rows=5022875 width=227) Conds:RS_55._col3=RS_275._col0(Inner),Output:["_col3","_col4","_col5"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out index e6ac653..fdd2fd0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out @@ -103,12 +103,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_162] Group By Operator [GBY_161] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_160] (rows=2511437 width=228) + Group By Operator [GBY_160] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_73] (rows=2511437 width=228) + Group By Operator [GBY_73] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 Select Operator [SEL_42] (rows=5022875 width=229) Output:["_col4","_col5","_col6"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out index da131d6..0a8c9a9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out @@ -117,12 +117,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_286] Group By Operator [GBY_285] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_284] (rows=2511437 width=228) + Group By Operator [GBY_284] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_114] (rows=2511437 width=228) + Group By Operator [GBY_114] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 Merge Join Operator [MERGEJOIN_241] (rows=5022875 width=227) Conds:RS_61._col3=RS_283._col0(Inner),Output:["_col3","_col4","_col5"]