Jonathan Eagles created MAPREDUCE-7232: ------------------------------------------
Summary: Reduce excessive getFileLinkInfo calls in MR Key: MAPREDUCE-7232 URL: https://issues.apache.org/jira/browse/MAPREDUCE-7232 Project: Hadoop Map/Reduce Issue Type: Improvement Reporter: Jonathan Eagles Assignee: Jonathan Eagles The MR Client redundantly calls getFileLinkInfo for the same file sometimes over 10 times. The files under question originate from mapreduce.job.cache.files and mapreduce.job.classpath.files and their archive equivalences. Client Observation Case 1 & 2 {code} 19/08/15 10:57:55 INFO hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar java.lang.Exception at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498) at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772) at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341) at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301) at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:262) at org.apache.hadoop.mapred.YARNRunner.createApplicationSubmissionContext(YARNRunner.java:467) at org.apache.hadoop.mapred.YARNRunner.submitJob(YARNRunner.java:296) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244) at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341) at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1359) at org.apache.hadoop.mapreduce.SleepJob.run(SleepJob.java:273) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) at org.apache.hadoop.mapreduce.SleepJob.main(SleepJob.java:194) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71) at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144) at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:136) at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:144) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:239) at org.apache.hadoop.util.RunJar.main(RunJar.java:153) {code} {code} 19/08/15 10:57:55 INFO hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar java.lang.Exception at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498) at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772) at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341) at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301) at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:263) at org.apache.hadoop.mapred.YARNRunner.createApplicationSubmissionContext(YARNRunner.java:467) at org.apache.hadoop.mapred.YARNRunner.submitJob(YARNRunner.java:296) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244) at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341) at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1359) at org.apache.hadoop.mapreduce.SleepJob.run(SleepJob.java:273) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) at org.apache.hadoop.mapreduce.SleepJob.main(SleepJob.java:194) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71) at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144) at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:136) at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:144) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:239) at org.apache.hadoop.util.RunJar.main(RunJar.java:153) {code} AM Observation Case 1 & 2 {code} 2019-08-15 10:58:00,987 INFO [AsyncDispatcher event handler] org.apache.hadoop.hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar java.lang.Exception at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498) at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772) at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341) at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301) at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:262) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.getInitialClasspath(TaskAttemptImpl.java:744) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createCommonContainerLaunchContext(TaskAttemptImpl.java:883) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createContainerLaunchContext(TaskAttemptImpl.java:944) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1711) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1688) at org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:1207) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:147) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1450) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1442) at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184) at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110) at java.lang.Thread.run(Thread.java:748) {code} {code} 2019-08-15 10:58:00,990 INFO [AsyncDispatcher event handler] org.apache.hadoop.hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar java.lang.Exception at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498) at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772) at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341) at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301) at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:263) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.getInitialClasspath(TaskAttemptImpl.java:744) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createCommonContainerLaunchContext(TaskAttemptImpl.java:883) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createContainerLaunchContext(TaskAttemptImpl.java:944) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1711) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1688) at org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:1207) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:147) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1450) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1442) at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184) at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110) at java.lang.Thread.run(Thread.java:748) {code} Client mapreduce.job.cache.files {code} 19/08/15 15:14:11 INFO hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar java.lang.Exception at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498) at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772) at org.apache.hadoop.mapreduce.v2.util.MRApps.parseDistributedCacheArtifacts(MRApps.java:601) at org.apache.hadoop.mapreduce.v2.util.MRApps.setupDistributedCache(MRApps.java:491) at org.apache.hadoop.mapred.YARNRunner.createApplicationSubmissionContext(YARNRunner.java:487) at org.apache.hadoop.mapred.YARNRunner.submitJob(YARNRunner.java:296) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244) at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341) at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1359) at org.apache.hadoop.mapreduce.SleepJob.run(SleepJob.java:273) at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) at org.apache.hadoop.mapreduce.SleepJob.main(SleepJob.java:194) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71) at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144) at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:136) at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:144) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:239) at org.apache.hadoop.util.RunJar.main(RunJar.java:153) {code} AM mapreduce.job.cache.files {code} 2019-08-15 15:14:15,872 INFO [AsyncDispatcher event handler] org.apache.hadoop.hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar java.lang.Exception at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486) at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498) at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772) at org.apache.hadoop.mapreduce.v2.util.MRApps.parseDistributedCacheArtifacts(MRApps.java:601) at org.apache.hadoop.mapreduce.v2.util.MRApps.setupDistributedCache(MRApps.java:491) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createCommonContainerLaunchContext(TaskAttemptImpl.java:818) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createContainerLaunchContext(TaskAttemptImpl.java:944) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1711) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1688) at org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:1207) at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:147) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1450) at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1442) at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184) at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110) at java.lang.Thread.run(Thread.java:748) {code} -- This message was sent by Atlassian JIRA (v7.6.14#76016) --------------------------------------------------------------------- To unsubscribe, e-mail: mapreduce-dev-unsubscr...@hadoop.apache.org For additional commands, e-mail: mapreduce-dev-h...@hadoop.apache.org