AMBARI-18936. DataNode JVM heap settings should include CMSInitiatingOccupancy (Arpit Agarwal via smohanty)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/0c837a60 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/0c837a60 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/0c837a60 Branch: refs/heads/branch-feature-AMBARI-18456 Commit: 0c837a60c6bbe7c6fb7c5586963520957bb4146f Parents: 05fe423 Author: Sumit Mohanty <[email protected]> Authored: Fri Dec 2 13:13:35 2016 -0800 Committer: Sumit Mohanty <[email protected]> Committed: Fri Dec 2 13:13:35 2016 -0800 ---------------------------------------------------------------------- .../services/HDFS/configuration/hadoop-env.xml | 176 +++++++++++++++++++ 1 file changed, 176 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/0c837a60/ambari-server/src/main/resources/stacks/HDP/2.4/services/HDFS/configuration/hadoop-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.4/services/HDFS/configuration/hadoop-env.xml b/ambari-server/src/main/resources/stacks/HDP/2.4/services/HDFS/configuration/hadoop-env.xml new file mode 100644 index 0000000..24e0193 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.4/services/HDFS/configuration/hadoop-env.xml @@ -0,0 +1,176 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> +<configuration supports_adding_forbidden="true"> + <!-- hadoop-env.sh --> + <property> + <name>content</name> + <display-name>hadoop-env template</display-name> + <description>This is the jinja template for hadoop-env.sh file</description> + <value> +# Set Hadoop-specific environment variables here. + +# The only required environment variable is JAVA_HOME. All others are +# optional. When running a distributed configuration it is best to +# set JAVA_HOME in this file, so that it is correctly defined on +# remote nodes. + +# The java implementation to use. Required. +export JAVA_HOME={{java_home}} +export HADOOP_HOME_WARN_SUPPRESS=1 + +# Hadoop home directory +export HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}} + +# Hadoop Configuration Directory + +{# this is different for HDP1 #} +# Path to jsvc required by secure HDP 2.0 datanode +export JSVC_HOME={{jsvc_path}} + + +# The maximum amount of heap to use, in MB. Default is 1000. +export HADOOP_HEAPSIZE="{{hadoop_heapsize}}" + +export HADOOP_NAMENODE_INIT_HEAPSIZE="-Xms{{namenode_heapsize}}" + +# Extra Java runtime options. Empty by default. +export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}" + +# Command specific options appended to HADOOP_OPTS when specified +HADOOP_JOBTRACKER_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{jtnode_opt_newsize}} -XX:MaxNewSize={{jtnode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xmx{{jtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dhadoop.mapreduce.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}" + +HADOOP_TASKTRACKER_OPTS="-server -Xmx{{ttnode_heapsize}} -Dhadoop.security.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}" + +{% if java_version < 8 %} +SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -XX:PermSize={{namenode_opt_permsize}} -XX:MaxPermSize={{namenode_opt_maxpermsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT" +export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" +export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -XX:PermSize=128m -XX:MaxPermSize=256m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{dtnode_heapsize}} -Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_DATANODE_OPTS} -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly" + +export HADOOP_SECONDARYNAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-secondarynamenode/bin/kill-secondary-name-node\" ${HADOOP_SECONDARYNAMENODE_OPTS}" + +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m -XX:MaxPermSize=512m $HADOOP_CLIENT_OPTS" + +{% else %} +SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT" +export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" +export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{dtnode_heapsize}} -Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_DATANODE_OPTS} -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly" + +export HADOOP_SECONDARYNAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-secondarynamenode/bin/kill-secondary-name-node\" ${HADOOP_SECONDARYNAMENODE_OPTS}" + +# The following applies to multiple commands (fs, dfs, fsck, distcp etc) +export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m $HADOOP_CLIENT_OPTS" +{% endif %} + +HADOOP_NFS3_OPTS="-Xmx{{nfsgateway_heapsize}}m -Dhadoop.security.logger=ERROR,DRFAS ${HADOOP_NFS3_OPTS}" +HADOOP_BALANCER_OPTS="-server -Xmx{{hadoop_heapsize}}m ${HADOOP_BALANCER_OPTS}" + + +# On secure datanodes, user to run the datanode as after dropping privileges +export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER:-{{hadoop_secure_dn_user}}} + +# Extra ssh options. Empty by default. +export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR" + +# Where log files are stored. $HADOOP_HOME/logs by default. +export HADOOP_LOG_DIR={{hdfs_log_dir_prefix}}/$USER + +# History server logs +export HADOOP_MAPRED_LOG_DIR={{mapred_log_dir_prefix}}/$USER + +# Where log files are stored in the secure data environment. +export HADOOP_SECURE_DN_LOG_DIR={{hdfs_log_dir_prefix}}/$HADOOP_SECURE_DN_USER + +# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. +# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves + +# host:path where hadoop code should be rsync'd from. Unset by default. +# export HADOOP_MASTER=master:/home/$USER/src/hadoop + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HADOOP_SLAVE_SLEEP=0.1 + +# The directory where pid files are stored. /tmp by default. +export HADOOP_PID_DIR={{hadoop_pid_dir_prefix}}/$USER +export HADOOP_SECURE_DN_PID_DIR={{hadoop_pid_dir_prefix}}/$HADOOP_SECURE_DN_USER + +# History server pid +export HADOOP_MAPRED_PID_DIR={{mapred_pid_dir_prefix}}/$USER + +YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY" + +# A string representing this instance of hadoop. $USER by default. +export HADOOP_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. + +# export HADOOP_NICENESS=10 + +# Add database libraries +JAVA_JDBC_LIBS="" +if [ -d "/usr/share/java" ]; then + for jarFile in `ls /usr/share/java | grep -E "(mysql|ojdbc|postgresql|sqljdbc)" 2>/dev/null` + do + JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile + done +fi + +# Add libraries to the hadoop classpath - some may not need a colon as they already include it +export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}${JAVA_JDBC_LIBS} + +# Setting path to hdfs command line +export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}} + +# Mostly required for hadoop 2.0 +export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH} + +export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS" + + +# Fix temporary bug, when ulimit from conf files is not picked up, without full relogin. +# Makes sense to fix only when runing DN as root +if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then + {% if is_datanode_max_locked_memory_set %} + ulimit -l {{datanode_max_locked_memory}} + {% endif %} + ulimit -n {{hdfs_user_nofile_limit}} +fi + </value> + <value-attributes> + <type>content</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>nfsgateway_heapsize</name> + <display-name>NFSGateway maximum Java heap size</display-name> + <value>1024</value> + <description>Maximum Java heap size for NFSGateway (Java option -Xmx)</description> + <value-attributes> + <type>int</type> + <unit>MB</unit> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> +</configuration>
