[ https://issues.apache.org/jira/browse/TEZ-1238?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14052164#comment-14052164 ]
Jeff Zhang commented on TEZ-1238: --------------------------------- Attach the patch. After the patch. The message in client side is as following: *Missing jar in LocalResource of Vertex* {code} DAG diagnostics:[Vertex failed, vertexName=tokenizer, vertexId=vertex_1404448568107_0001_1_00, diagnostics=[Task failed, taskId=task_1404448568107_0001_1_00_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44) at org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ClassNotFoundException: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:270) at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42) ... 14 more ], TaskAttempt 1 failed, info=[Error: Failure while running task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44) at org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ClassNotFoundException: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:270) at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42) ... 14 more ], TaskAttempt 2 failed, info=[Error: Failure while running task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44) at org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ClassNotFoundException: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:270) at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42) ... 14 more ], TaskAttempt 3 failed, info=[Error: Failure while running task:org.apache.tez.dag.api.TezUncheckedException: Unable to load class: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44) at org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:534) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.initialize(LogicalIOProcessorRuntimeTask.java:170) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:177) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.ClassNotFoundException: com.zjffdu.tutorial.tez.WordCount$TokenProcessor at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:270) at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:42) ... 14 more ]], Vertex failed as one or more tasks failed. failedTasks:1], Vertex killed, vertexName=summer, vertexId=vertex_1404448568107_0001_1_01, diagnostics=[Vertex received Kill while in RUNNING state., Vertex killed as other vertex failed. failedTasks:0], DAG failed due to vertex failure. failedVertices:1 killedVertices:1] {code} *Exception happen in Processor* {code} DAG diagnostics:[Vertex re-running, vertexName=tokenizer, vertexId=vertex_1404445909469_0005_1_00, Vertex failed, vertexName=summer, vertexId=vertex_1404445909469_0005_1_01, diagnostics=[Task failed, taskId=task_1404445909469_0005_1_01_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running task:java.lang.ClassCastException: org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94) at org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) ], TaskAttempt 1 failed, info=[Error: Failure while running task:java.lang.ClassCastException: org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94) at org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) ], TaskAttempt 2 failed, info=[Error: Failure while running task:java.lang.ClassCastException: org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94) at org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) ], TaskAttempt 3 failed, info=[Error: Failure while running task:java.lang.ClassCastException: org.apache.tez.runtime.library.input.ShuffledMergedInput$ShuffledMergedKeyValuesReader cannot be cast to org.apache.tez.runtime.library.api.KeyValueReader at com.zjffdu.tutorial.tez.WordCount$SumProcessor.run(WordCount.java:94) at org.apache.tez.runtime.library.processor.SimpleProcessor.run(SimpleProcessor.java:37) at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:309) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:180) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:172) at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.call(TezTaskRunner.java:167) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) ]], Vertex failed as one or more tasks failed. failedTasks:1], Vertex killed, vertexName=tokenizer, vertexId=vertex_1404445909469_0005_1_00, diagnostics=[Vertex received Kill while in RUNNING state., Vertex killed as other vertex failed. failedTasks:0], DAG failed due to vertex failure. failedVertices:1 killedVertices:1] 14/07/04 12:30:27 INFO client.TezSession: Shutting down Tez Session, sessionName=tez-session, applicationId=application_1404445909469_0005 {code} > Display more clear diagnostics info on client side if missing jar in > LocalResource or Exception happen in Processor > ------------------------------------------------------------------------------------------------------------------- > > Key: TEZ-1238 > URL: https://issues.apache.org/jira/browse/TEZ-1238 > Project: Apache Tez > Issue Type: Sub-task > Affects Versions: 0.4.0 > Reporter: Jeff Zhang > Assignee: Jeff Zhang > Attachments: Tez-1238.patch > > > I have a tez job which is failed due to that I didn't put my jar to the local > resources. But on the client side, the exception is not clear for me to > figure what's wrong with it. The real reason is that It couldn't load the > Processor class. I have to run command "yarn logs" to find the real exception > in the container logs. > I also have another case that has exception in the my Processor, the message > on the client side is still not clear to me. I think that should we pass the > real exception to the diagnostics and display it in client side, this should > help user to find out what's wrong with their program. > *Exception on client side* > {code} > 14/06/26 14:57:15 INFO rpc.DAGClientRPCImpl: VertexStatus: VertexName: > summer Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 0 Killed: > 114/06/26 14:57:15 INFO rpc.DAGClientRPCImpl: VertexStatus: VertexName: > tokenizer Progress: 0% TotalTasks: 1 Succeeded: 0 Running: 0 Failed: 1 > Killed: 014/06/26 14:57:15 INFO rpc.DAGClientRPCImpl: DAG completed. > FinalState=FAILEDDAG diagnostics:[Vertex failed, vertexName=tokenizer, > vertexId=vertex_1403765612557_0004_1_00, diagnostics=[Task failed, > taskId=task_1403765612557_0004_1_00_000000, diagnostics=[TaskAttempt 0 > failed, info=[Container container_1403765612557_0004_01_000002 COMPLETED > with diagnostics set to [Exception from container-launch: > org.apache.hadoop.util.Shell$ExitCodeException: > org.apache.hadoop.util.Shell$ExitCodeException: at > org.apache.hadoop.util.Shell.runCommand(Shell.java:505) > at org.apache.hadoop.util.Shell.run(Shell.java:418) > at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650) > at > org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer( > DefaultContainerExecutor.java:195) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call( > ContainerLaunch.java:300) > at > org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call( > ContainerLaunch.java:81) > at java.util.concurrent.FutureTask.run(FutureTask.java:262) > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1145) > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Container exited with a non-zero exit code 1 > {code} > *The real exception in container log:* > {code} > 2014-06-26 14:57:02,146 ERROR [main] > org.apache.hadoop.yarn.YarnUncaughtExceptionHandler: Thread > Thread[main,5,main] threw an Exception. > org.apache.tez.dag.api.TezUncheckedException: Unable to load class: > com.zjffdu.tutorial.tez.WordCount$TokenProcessor > at org.apache.tez.common.RuntimeUtils.getClazz(RuntimeUtils.java:44) > at > org.apache.tez.common.RuntimeUtils.createClazzInstance(RuntimeUtils.java:66) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.createProcessor(LogicalIOProcessorRuntimeTask.java:533) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.<init>(LogicalIOProcessorRuntimeTask.java:146) > at > org.apache.tez.runtime.task.TezTaskRunner.<init>(TezTaskRunner.java:78) > at org.apache.tez.runtime.task.TezChild.run(TezChild.java:208) > at org.apache.tez.runtime.task.TezChild.main(TezChild.java:363) > {code} -- This message was sent by Atlassian JIRA (v6.2#6252)