[
https://issues.apache.org/jira/browse/YARN-5219?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Sunil G updated YARN-5219:
--------------------------
Attachment: YARN-5219.001.patch
Hi
Attaching an initial patch for review. I have tried to validate those shell
variables which has *$* character in it. Because this means there is a
substitution happening for that shell variable.
{{verify_shell_variable}} does the validation.
Attaching {{launc_container.sh}}
{noformat}
#!/bin/bash
verify_shell_variable() {
echo "Variable ${1} to be defined as ${2:?}"
}
export
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT/etc/hadoop"}
export JAVA_HOME=${JAVA_HOME:-"/usr/lib/jvm/java-8-oracle"}
export APP_SUBMIT_TIME_ENV="1465983732983"
export NM_HOST="localhost"
export LD_LIBRARY_PATH="$PWD:$HADOOP_COMMON_HOME/lib/native"
verify_shell_variable LD_LIBRARY_PATH ${LD_LIBRARY_PATH}
export
HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"}
export LOGNAME="root"
export JVM_PID="$$"
verify_shell_variable JVM_PID ${JVM_PID}
export HADOOP_MAPRED_HOME="/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"
export
PWD="/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/container_1465983701333_0001_01_000001"
export
HADOOP_COMMON_HOME=${HADOOP_COMMON_HOME:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"}
export
LOCAL_DIRS="/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001"
export APPLICATION_WEB_PROXY_BASE="/proxy/application_1465983701333_0001"
export SHELL="/bin/bash"
export NM_HTTP_PORT="25008"
export
LOG_DIRS="/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001"
export
NM_AUX_SERVICE_mapreduce_shuffle="AAA0+gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=^M
"
export NM_PORT="25006"
export USER="root"
export
HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-"/opt/hadoop/trunk/hadoop-3.0.0-alpha1-SNAPSHOT"}
export
CLASSPATH="$PWD:$HADOOP_CONF_DIR:$HADOOP_COMMON_HOME/share/hadoop/common/*:$HADOOP_COMMON_HOME/share/hadoop/common/lib/*:$HADOOP_HDFS_HOME/share/hadoop/hdfs/*:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*:$HADOOP_YARN_HOME/share/hadoop/yarn/*:$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*:job.jar/job.jar:job.jar/classes/:job.jar/lib/*:$PWD/*"
verify_shell_variable CLASSPATH ${CLASSPATH}
export
HADOOP_TOKEN_FILE_LOCATION="/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/container_1465983701333_0001_01_000001/container_tokens"
export YARN_RESOURCEMANAGER_APPLICATION_QUEUE="default"
export HOME="/home/"
export CONTAINER_ID="container_1465983701333_0001_01_000001"
export MALLOC_ARENA_MAX="4"
ln -sf
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/11/job.jar"
"job.jar"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
mkdir -p jobSubmitDir
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
ln -sf
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/12/job.split"
"jobSubmitDir/job.split"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
ln -sf
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/13/job.xml"
"job.xml"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
mkdir -p jobSubmitDir
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
ln -sf
"/Users/sunil/local/project_home/builds/tmp/nm-local-dir/usercache/root/appcache/application_1465983701333_0001/filecache/10/job.splitmetainfo"
"jobSubmitDir/job.splitmetainfo"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
# Creating copy of launch script
cp "launch_container.sh"
"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/launch_container.sh"
chmod 640
"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/launch_container.sh"
# Determining directory contents
echo "ls -l:"
1>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
ls -l
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
echo "find -L . -maxdepth 5 -ls:"
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
find -L . -maxdepth 5 -ls
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
echo "broken symlinks(find -L . -maxdepth 5 -type l -ls):"
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
find -L . -maxdepth 5 -type l -ls
1>>"/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/directory.info"
exec /bin/bash -c "$JAVA_HOME/bin/java -Djava.io.tmpdir=$PWD/tmp
-Dlog4j.configuration=container-log4j.properties
-Dyarn.app.container.log.dir=/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001
-Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA
-Dhadoop.root.logfile=syslog -Xmx1024m
org.apache.hadoop.mapreduce.v2.app.MRAppMaster
1>/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/stdout
2>/Users/sunil/local/project_home/builds/tmp/userlogs/application_1465983701333_0001/container_1465983701333_0001_01_000001/stderr
"
hadoop_shell_errorcode=$?
if [ $hadoop_shell_errorcode -ne 0 ]
then
exit $hadoop_shell_errorcode
fi
{noformat}
> When an export var command fails in launch_container.sh, the full container
> launch should fail
> ----------------------------------------------------------------------------------------------
>
> Key: YARN-5219
> URL: https://issues.apache.org/jira/browse/YARN-5219
> Project: Hadoop YARN
> Issue Type: Bug
> Reporter: Hitesh Shah
> Assignee: Sunil G
> Attachments: YARN-5219.001.patch
>
>
> Today, a container fails if certain files fail to localize. However, if
> certain env vars fail to get setup properly either due to bugs in the yarn
> application or misconfiguration, the actual process launch still gets
> triggered. This results in either confusing error messages if the process
> fails to launch or worse yet the process launches but then starts behaving
> wrongly if the env var is used to control some behavioral aspects.
> In this scenario, the issue was reproduced by trying to do export
> abc="$\{foo.bar}" which is invalid as var names cannot contain "." in bash.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]