Repository: hive Updated Branches: refs/heads/master ca51b69f5 -> 41c38ddc0
HIVE-14916. Reduce the memory requirements for Spark tests. (Dapeng Sun, reviewed by Ferdinand Xu, Siddharth Seth) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/41c38ddc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/41c38ddc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/41c38ddc Branch: refs/heads/master Commit: 41c38ddc056aaae78fb5bb33632d0d02c0e288fb Parents: ca51b69 Author: Siddharth Seth <ss...@apache.org> Authored: Thu Oct 13 23:36:02 2016 -0700 Committer: Siddharth Seth <ss...@apache.org> Committed: Thu Oct 13 23:36:02 2016 -0700 ---------------------------------------------------------------------- data/conf/spark/yarn-client/hive-site.xml | 4 ++-- .../spark/constprog_semijoin.q.out | 20 ++++++++++---------- .../clientpositive/spark/index_bitmap3.q.out | 4 ++-- .../spark/index_bitmap_auto.q.out | 4 ++-- .../spark/infer_bucket_sort_map_operators.q.out | 8 ++++---- .../infer_bucket_sort_reducers_power_two.q.out | 2 +- .../apache/hadoop/hive/shims/Hadoop23Shims.java | 8 +++----- 7 files changed, 24 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/data/conf/spark/yarn-client/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/spark/yarn-client/hive-site.xml b/data/conf/spark/yarn-client/hive-site.xml index 8c66f87..4e63245 100644 --- a/data/conf/spark/yarn-client/hive-site.xml +++ b/data/conf/spark/yarn-client/hive-site.xml @@ -236,7 +236,7 @@ <property> <name>spark.executor.memory</name> - <value>1g</value> + <value>512m</value> </property> <property> @@ -246,7 +246,7 @@ <property> <name>spark.driver.memory</name> - <value>1g</value> + <value>512m</value> </property> <property> http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out index c6a9b14..0042255 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out @@ -65,7 +65,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -161,8 +161,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -293,8 +293,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -425,7 +425,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -514,7 +514,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -605,7 +605,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -694,7 +694,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -785,7 +785,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out b/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out index b77966d..b16989c 100644 --- a/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out +++ b/ql/src/test/results/clientpositive/spark/index_bitmap3.q.out @@ -110,8 +110,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Reducer 2 (GROUP, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out b/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out index 54f8d6c..f9e8e3d 100644 --- a/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out +++ b/ql/src/test/results/clientpositive/spark/index_bitmap_auto.q.out @@ -129,8 +129,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) - Reducer 3 <- Reducer 2 (GROUP, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out index a343d93..d690e00 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out @@ -190,7 +190,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -315,7 +315,7 @@ Table: test_table_out #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 + numFiles 2 numRows 0 rawDataSize 0 totalSize 0 @@ -465,7 +465,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 4) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -567,7 +567,7 @@ Table: test_table_out #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 + numFiles 2 numRows 309 rawDataSize 2728 totalSize 3037 http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out index 34f4cc1..8e50d23 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out @@ -318,7 +318,7 @@ Table: test_table #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 4 + numFiles 3 numRows 5 rawDataSize 19 totalSize 24 http://git-wip-us.apache.org/repos/asf/hive/blob/41c38ddc/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java ---------------------------------------------------------------------- diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 341b1e5..ff760c8 100644 --- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -17,10 +17,8 @@ */ package org.apache.hadoop.hive.shims; -import java.io.DataInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.InetSocketAddress; @@ -38,9 +36,7 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; - import javax.security.auth.Subject; - import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; @@ -70,7 +66,6 @@ import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; @@ -460,6 +455,9 @@ public class Hadoop23Shims extends HadoopShimsSecure { conf.set("yarn.resourcemanager.scheduler.class", "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"); // disable resource monitoring, although it should be off by default conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING, false); + conf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 2048); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 2048); configureImpersonation(conf); mr.init(conf); mr.start();