Hello devs

We have a map reduce job which reads compressed data from S3, converts it
into HFiles and bulkloads the data to hbase tables. I am currently facing
this error while running the map reduce job which gets
fired programmatically. I am also attaching  mapred-site.xml,
yarn-site.xml, core-site.xml and hdfs-site.xml

mapreduce.framework.name: yarn
INFO  [2015-07-23 23:53:20,222] org.apache.hadoop.yarn.client.RMProxy:
Connecting to ResourceManager at /172.30.0.147:8032
WARN  [2015-07-23 23:53:20,383] org.apache.hadoop.mapreduce.JobSubmitter:
Hadoop command-line option parsing not performed. Implement the Tool
interface and execute your application with ToolRunner to remedy this.
INFO  [2015-07-23 23:53:20,492]
org.apache.hadoop.mapreduce.lib.input.FileInputFormat: Total input paths to
process : 16
INFO  [2015-07-23 23:53:20,561] org.apache.hadoop.mapreduce.JobSubmitter:
number of splits:16
INFO  [2015-07-23 23:53:20,719] org.apache.hadoop.mapreduce.JobSubmitter:
Submitting tokens for job: job_1437695344326_0002
INFO  [2015-07-23 23:53:20,842]
org.apache.hadoop.yarn.client.api.impl.YarnClientImpl: Submitted
application application_1437695344326_0002
INFO  [2015-07-23 23:53:20,867] org.apache.hadoop.mapreduce.Job: The url to
track the job:
http://ip-172-30-0-147.us-west-2.compute.internal:8088/proxy/application_1437695344326_0002/
INFO  [2015-07-23 23:53:20,868] org.apache.hadoop.mapreduce.Job: Running
job: job_1437695344326_0002
INFO  [2015-07-23 23:53:35,994] org.apache.hadoop.mapreduce.Job: Job
job_1437695344326_0002 running in uber mode : false
INFO  [2015-07-23 23:53:35,995] org.apache.hadoop.mapreduce.Job:  map 0%
reduce 0%
INFO  [2015-07-23 23:53:43,053] org.apache.hadoop.mapreduce.Job: Task Id :
attempt_1437695344326_0002_m_000001_1000, Status : FAILED
File
file:/tmp/hadoop-yarn/staging/root/.staging/job_1437695344326_0002/job.jar
does not exist
java.io.FileNotFoundException: File
file:/tmp/hadoop-yarn/staging/root/.staging/job_1437695344326_0002/job.jar
does not exist
at
org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:608)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:821)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:598)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:414)
at org.apache.hadoop.yarn.util.FSDownload.copy(FSDownload.java:251)
at org.apache.hadoop.yarn.util.FSDownload.access$000(FSDownload.java:61)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:359)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:357)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:356)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:60)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)


INFO  [2015-07-23 23:53:44,075] org.apache.hadoop.mapreduce.Job: Task Id :
attempt_1437695344326_0002_m_000002_1000, Status : FAILED
File
file:/tmp/hadoop-yarn/staging/root/.staging/job_1437695344326_0002/job.jar
does not exist
java.io.FileNotFoundException: File
file:/tmp/hadoop-yarn/staging/root/.staging/job_1437695344326_0002/job.jar
does not exist
at
org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:608)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:821)
at
org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:598)
at
org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:414)
at org.apache.hadoop.yarn.util.FSDownload.copy(FSDownload.java:251)
at org.apache.hadoop.yarn.util.FSDownload.access$000(FSDownload.java:61)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:359)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:357)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:356)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:60)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://172.30.0.147:8020</value>
  </property>
  <property>
    <name>topology.script.file.name</name>
    <value>/etc/hadoop/rack-topology.sh</value>
  </property>
  <property>
    <name>io.compression.codecs</name> 
    <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <property>
    <name>hadoop.security.authorization</name>
    <value>simple</value>
  </property>
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>dfs.name.dir</name>
    <value>/data/blitz-hadoop/dfs.name.dir</value>
  </property>
  <property>
    <name>dfs.data.dir</name>
    <value>/data/blitz-hadoop/dfs.data.dir</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.hosts.exclude</name>
    <value>/usr/hdp/2.2.6.0-2800/hadoop/dfs_hosts_exclude</value>
  </property>
  <property>
    <name>dfs.datanode.max.xcievers</name>
    <value>8192</value>
  </property>
  <property>
    <name>dfs.datanode.max.transfer.threads</name>
    <value>4096</value>
  </property>
  <property>
    <name>dfs.block.local-path-access.user</name>
    <value>root</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir.perm</name>
    <value>750</value>
  </property>
  <property>
    <name>dfs.namenode.handler.count</name>
    <value>64</value>
  </property>
  <property>
    <name>dfs.datanode.handler.count</name>
    <value>8</value>
  </property>
  <property>
    <name>dfs.block.size</name>
    <value>26843648</value>
  </property>
  <property>
    <name>dfs.namenode.avoid.read.stale.datanode</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.avoid.write.stale.datanode</name>
    <value>true</value>
  </property>
    <property>
    <name>dfs.namenode.stale.datanode.interval</name>
    <value>30000</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.domain.socket.path</name>
    <value>/var/lib/hadoop-hdfs/dn_socket</value>
  </property>
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>172.30.0.147:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address</name>
    <value>172.30.0.147:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>172.30.0.147:8030</value>
  </property>
  <property>  
    <name>mapreduce.framework.name</name>  
    <value>yarn</value>  
  </property>
  <property>
    <name>mapred.system.dir</name>
    <value>/hadoop/mapred/system</value>
  </property>
  <property>
    <name>mapred.local.dir</name>
    <value>/data/mapred_local</value>
  </property>
  <property>
    <name>mapreduce.jobtracker.staging.root.dir</name>
    <value>/user/mapred/staging</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/data/hadoop_tmp</value>
  </property> 
  <property>
    <name>mapred.tasktracker.map.tasks.maximum</name>
    <value>8</value>
  </property>
  <property>
    <name>mapred.tasktracker.reduce.tasks.maximum</name>
    <value>8</value>
  </property>
  <property>
    <name>mapred.hosts.exclude</name>
    <value>/usr/hdp/2.2.6.0-2800/hadoop/mapred_hosts_exclude</value>
  </property>
  <property>
    <name>mapred.queue.names</name>
    <value>default</value>
  </property>
  <property>
    <name>mapred.acls.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>mapred.child.java.opts</name>
    <value>-Xmx4g</value>
  </property>
  <property>
    <name>mapred.reduce.slowstart.completed.maps</name>
    <value>0.85</value>
  </property>
  <property>
    <name>mapred.compress.map.output</name>
    <value>true</value>
  </property>
  <property>
    <name>mapred.map.output.compression.codec</name>
    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <property>
    <name>mapreduce.admin.map.child.java.opts</name>
    <value>-server -XX:NewRatio=8 -Djava.library.path=/usr/hdp/2.2.6.0-2800/hadoop/lib/native -Djava.net.preferIPv4Stack=true</value>
    <final>true</final>
  </property> 
  <property>
    <name>mapreduce.admin.reduce.child.java.opts</name>
    <value>-server -XX:NewRatio=8 -Djava.library.path=/usr/hdp/2.2.6.0-2800/hadoop/lib/native -Djava.net.preferIPv4Stack=true</value>
    <final>true</final>
  </property>
  <property>
    <name>mapreduce.map.memory.mb</name>
    <value>4096</value>
  </property>
  <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>8192</value>
  </property>
  <property>
    <name>mapreduce.map.java.opts</name>
    <value>-Xmx3072m</value>
  </property>
  <property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-Xmx6144m</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.staging-dir</name>
    <value>/data/staging</value>
  </property>
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>172.30.0.147:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address</name>
    <value>172.30.0.147:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>172.30.0.147:8030</value>
  </property>
  <property>
    <name>mapred.compress.map.output</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>172.30.0.147:10020</value>
  </property>
  <property>
      <name>yarn.nodemanager.resource.memory-mb</name>
      <value>10240</value>
  </property>
  <property>
      <name>yarn.scheduler.minimum-allocation-mb</name>
      <value>2048</value>
  </property>
  <property>
    <name>mapred.map.output.compression.codec</name>
    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>
  <property>
    <name>fs.s3n.impl</name>
    <value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
  </property>
  <property>
    <name>mapreduce.application.classpath</name>
    <value>/usr/hdp/2.2.6.0-2800/hadoop-mapreduce/*,/usr/hdp/2.2.6.0-2800/hbase/lib/*,/opt/appdynamics/blitz/lib/*,/usr/hdp/2.2.6.0-2800/hadoop-yarn/*</value>
  </property>
  <property>
    <name>yarn.application.classpath</name>
    <value>/usr/hdp/2.2.6.0-2800/hadoop-mapreduce/*,/usr/hdp/2.2.6.0-2800/hbase/lib/*,/opt/appdynamics/blitz/lib/*,/usr/hdp/2.2.6.0-2800/hadoop-yarn/*</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value>2.1</value>
  </property>
</configuration>

Reply via email to