[ https://issues.apache.org/jira/browse/MYRIAD-255?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Liu You updated MYRIAD-255: --------------------------- Description: I can't run yarn application, and the job gets stucked every time I execute the command 'hadoop jar xxx.....'. Please help me. Thank you very much. !stucked job.png|thumbnail! Myriad Web UI: !myriad ui-1.png|thumbnail! !myriad ui-2.png|thumbnail! Mesos Web UI: !Mesos Web UI.png|thumbnail! yarn-mesos-resourcemanager-s18.log: {code:java} 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server Responder: starting 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 8032: starting 2017-07-20 17:36:46,955 INFO org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor value: "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" lost with exit status: 9 2017-07-20 17:36:46,969 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: TASK_FAILED 2017-07-20 17:36:46,970 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned to active state 2017-07-20 17:36:46,971 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked as pending failed task with id value: "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898" {code} myriad-config-default.yml: {code:java} mesosMaster: 192.168.130.18:5050 checkpoint: false frameworkFailoverTimeout: 43200000 frameworkName: MyriadAlpha frameworkRole: "yarn" frameworkUser: mesos # User the Node Manager runs as, required if nodeManagerURI set, otherwise defaults to the user # running the resource manager. frameworkSuperUser: root # To be deprecated, currently permissions need set by a superuser due to Mesos-1790. Must be # root or have passwordless sudo. Required if nodeManagerURI set, ignored otherwise. #nativeLibrary: /usr/local/lib/libmesos.so nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so zkServers: 192.168.130.18:2181 zkTimeout: 20000 restApiPort: 8192 #servedConfigPath: dist/config.tgz servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz #servedBinaryPath: dist/hadoop-2.6.0.tgz servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz profiles: zero: # NMs launched with this profile dynamically obtain cpu/mem from Mesos cpu: 0 mem: 0 small: cpu: 2 mem: 2048 medium: cpu: 4 mem: 4096 large: cpu: 10 mem: 12288 nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero profile. medium: 1 # <profile_name : instances> rebalancer: false haEnabled: false nodemanager: jvmMaxMemoryMB: 1024 cpus: 0.2 cgroups: false executor: jvmMaxMemoryMB: 256 path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar #path: file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar #The following should be used for a remotely distributed URI, hdfs assumed but other URI types valid. #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz #configUri: http://s18:8192/api/artifacts/config.tgz #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz yarnEnvironment: YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 #HADOOP_CONF_DIR=config #HADOOP_TMP_DIR=$MESOS_SANDBOX #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes necessary #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox directory #JAVA_LIBRARY_PATH: /opt/mycompany/lib #mesosAuthenticationPrincipal: #mesosAuthenticationSecretFilename: {code} yarn-site.xml: {code:java} <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>s18</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,myriad_executor</value> <!-- If using MapR distro, please use the following value: <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> --> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.myriad_executor.class</name> <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> </property> <property> <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> <value>2000</value> </property> <property> <name>yarn.am.liveness-monitor.expiry-interval-ms</name> <value>10000</value> </property> <property> <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> <value>1000</value> </property> <!-- (more) Site-specific YARN configuration properties --> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>${nodemanager.resource.cpu-vcores}</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>${nodemanager.resource.memory-mb}</value> </property> <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <!-- Dynamic Port Assignment enablement by Mesos --> <property> <name>yarn.nodemanager.address</name> <value>${myriad.yarn.nodemanager.address}</value> </property> <property> <name>yarn.nodemanager.webapp.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.webapp.https.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.localizer.address</name> <value>${myriad.yarn.nodemanager.localizer.address}</value> </property> <!-- Myriad Scheduler configuration --> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> </property> <!-- Needed for Fine Grain Scaling --> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>0</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>0</value> </property> </configuration> {code} was: I can't run yarn application, and the job gets stucked every time I execute the command 'hadoop jar xxx.....'. Please help me. Thank you very much. !stucked job.png|thumbnail! Myriad Web UI: !myriad ui-1.png|thumbnail! !myriad ui-2.png|thumbnail! Mesos Web UI: !Mesos Web UI.png|thumbnail! yarn-mesos-resourcemanager-s18.log: {code:java} 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server Responder: starting 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 8032: starting 2017-07-20 17:36:46,955 INFO org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor value: "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" lost with exit status: 9 2017-07-20 17:36:46,969 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: TASK_FAILED 2017-07-20 17:36:46,970 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned to active state 2017-07-20 17:36:46,971 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked as pending failed task with id value: "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898" {code} myriad-config-default.yml: {code:java} mesosMaster: 192.168.130.18:5050 checkpoint: false frameworkFailoverTimeout: 43200000 frameworkName: MyriadAlpha frameworkRole: "yarn" frameworkUser: mesos # User the Node Manager runs as, required if nodeManagerURI set, otherwise defaults to the user # running the resource manager. frameworkSuperUser: root # To be deprecated, currently permissions need set by a superuser due to Mesos-1790. Must be # root or have passwordless sudo. Required if nodeManagerURI set, ignored otherwise. #nativeLibrary: /usr/local/lib/libmesos.so nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so zkServers: 192.168.130.18:2181 zkTimeout: 20000 restApiPort: 8192 #servedConfigPath: dist/config.tgz servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz #servedBinaryPath: dist/hadoop-2.6.0.tgz servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz profiles: zero: # NMs launched with this profile dynamically obtain cpu/mem from Mesos cpu: 0 mem: 0 small: cpu: 2 mem: 2048 medium: cpu: 4 mem: 4096 large: cpu: 10 mem: 12288 nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero profile. medium: 1 # <profile_name : instances> rebalancer: false haEnabled: false nodemanager: jvmMaxMemoryMB: 1024 cpus: 0.2 cgroups: false executor: jvmMaxMemoryMB: 256 path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar #path: file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar #The following should be used for a remotely distributed URI, hdfs assumed but other URI types valid. #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz #configUri: http://s18:8192/api/artifacts/config.tgz #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz yarnEnvironment: YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 #HADOOP_CONF_DIR=config #HADOOP_TMP_DIR=$MESOS_SANDBOX #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes necessary #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox directory #JAVA_LIBRARY_PATH: /opt/mycompany/lib #mesosAuthenticationPrincipal: #mesosAuthenticationSecretFilename: {code} yarn-site.xml: {code:java} <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>s18</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,myriad_executor</value> <!-- If using MapR distro, please use the following value: <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> --> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.myriad_executor.class</name> <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> </property> <property> <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> <value>2000</value> </property> <property> <name>yarn.am.liveness-monitor.expiry-interval-ms</name> <value>10000</value> </property> <property> <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> <value>1000</value> </property> <!-- (more) Site-specific YARN configuration properties --> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>${nodemanager.resource.cpu-vcores}</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>${nodemanager.resource.memory-mb}</value> </property> <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <!-- Dynamic Port Assignment enablement by Mesos --> <property> <name>yarn.nodemanager.address</name> <value>${myriad.yarn.nodemanager.address}</value> </property> <property> <name>yarn.nodemanager.webapp.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.webapp.https.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.localizer.address</name> <value>${myriad.yarn.nodemanager.localizer.address}</value> </property> <!-- Myriad Scheduler configuration --> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> </property> <!-- Needed for Fine Grain Scaling --> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>0</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>0</value> </property> </configuration> {code} > unable to run yarn application with hadoop2.7.3 and mesos1.2.0 > -------------------------------------------------------------- > > Key: MYRIAD-255 > URL: https://issues.apache.org/jira/browse/MYRIAD-255 > Project: Myriad > Issue Type: Bug > Components: Executor, Scheduler > Affects Versions: Myriad 0.2.0 > Environment: Ubuntu 14.04.3 LTS > master hostname: s18 > slaves hostname: s19, s20, s21 > hadoop 2.7.3 > mesos 1.2.0 > Reporter: Liu You > Attachments: mapred-site.xml, > mesos-master.s18.mesos.log.INFO.20170720-173553.21812, > mesos-master.s18.mesos.log.WARNING.20170720-173553.21812, Mesos Web UI.png, > myriad-config-default.yml, myriad ui-1.png, myriad ui-2.png, stucked job.png, > yarn-mesos-resourcemanager-s18.log, yarn-mesos-resourcemanager-s18.out, > yarn-site.xml > > > I can't run yarn application, and the job gets stucked every time I execute > the command 'hadoop jar xxx.....'. Please help me. Thank you very much. > !stucked job.png|thumbnail! > Myriad Web UI: > !myriad ui-1.png|thumbnail! > !myriad ui-2.png|thumbnail! > Mesos Web UI: > !Mesos Web UI.png|thumbnail! > yarn-mesos-resourcemanager-s18.log: > {code:java} > 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server > Responder: starting > 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server > listener on 8032: starting > 2017-07-20 17:36:46,955 INFO > org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor > value: > "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" > of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" > lost with exit status: 9 > 2017-07-20 17:36:46,969 INFO > org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status > Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: > TASK_FAILED > 2017-07-20 17:36:46,970 INFO > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned > to active state > 2017-07-20 17:36:46,971 INFO > org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked > as pending failed task with id value: > "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898" > {code} > myriad-config-default.yml: > {code:java} > mesosMaster: 192.168.130.18:5050 > checkpoint: false > frameworkFailoverTimeout: 43200000 > frameworkName: MyriadAlpha > frameworkRole: "yarn" > frameworkUser: mesos # User the Node Manager runs as, required if > nodeManagerURI set, otherwise defaults to the user > # running the resource manager. > frameworkSuperUser: root # To be deprecated, currently permissions need set > by a superuser due to Mesos-1790. Must be > # root or have passwordless sudo. Required if > nodeManagerURI set, ignored otherwise. > #nativeLibrary: /usr/local/lib/libmesos.so > nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so > zkServers: 192.168.130.18:2181 > zkTimeout: 20000 > restApiPort: 8192 > #servedConfigPath: dist/config.tgz > servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz > #servedBinaryPath: dist/hadoop-2.6.0.tgz > servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz > profiles: > zero: # NMs launched with this profile dynamically obtain cpu/mem from > Mesos > cpu: 0 > mem: 0 > small: > cpu: 2 > mem: 2048 > medium: > cpu: 4 > mem: 4096 > large: > cpu: 10 > mem: 12288 > nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero > profile. > medium: 1 # <profile_name : instances> > rebalancer: false > haEnabled: false > nodemanager: > jvmMaxMemoryMB: 1024 > cpus: 0.2 > cgroups: false > executor: > jvmMaxMemoryMB: 256 > path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar > #path: > file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar > #The following should be used for a remotely distributed URI, hdfs assumed > but other URI types valid. > #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz > #configUri: http://s18:8192/api/artifacts/config.tgz > #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz > yarnEnvironment: > YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 > #HADOOP_CONF_DIR=config > #HADOOP_TMP_DIR=$MESOS_SANDBOX > #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set > #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes > necessary > #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 > #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox > directory > #JAVA_LIBRARY_PATH: /opt/mycompany/lib > #mesosAuthenticationPrincipal: > #mesosAuthenticationSecretFilename: > {code} > yarn-site.xml: > {code:java} > <configuration> > <!-- Site specific YARN configuration properties --> > <property> > <name>yarn.resourcemanager.hostname</name> > <value>s18</value> > </property> > <property> > <name>yarn.nodemanager.aux-services</name> > <value>mapreduce_shuffle</value> > </property> > > <property> > <name>yarn.nodemanager.aux-services</name> > <value>mapreduce_shuffle,myriad_executor</value> > <!-- If using MapR distro, please use the following value: > <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> > --> > </property> > <property> > <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> > <value>org.apache.hadoop.mapred.ShuffleHandler</value> > </property> > <property> > <name>yarn.nodemanager.aux-services.myriad_executor.class</name> > <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> > </property> > <property> > <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> > <value>2000</value> > </property> > <property> > <name>yarn.am.liveness-monitor.expiry-interval-ms</name> > <value>10000</value> > </property> > <property> > <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> > <value>1000</value> > </property> > <!-- (more) Site-specific YARN configuration properties --> > <property> > <name>yarn.nodemanager.resource.cpu-vcores</name> > <value>${nodemanager.resource.cpu-vcores}</value> > </property> > <property> > <name>yarn.nodemanager.resource.memory-mb</name> > <value>${nodemanager.resource.memory-mb}</value> > </property> > <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> > <property> > <name>yarn.nodemanager.pmem-check-enabled</name> > <value>false</value> > </property> > <property> > <name>yarn.nodemanager.vmem-check-enabled</name> > <value>false</value> > </property> > > <!-- Dynamic Port Assignment enablement by Mesos --> > <property> > <name>yarn.nodemanager.address</name> > <value>${myriad.yarn.nodemanager.address}</value> > </property> > <property> > <name>yarn.nodemanager.webapp.address</name> > <value>${myriad.yarn.nodemanager.webapp.address}</value> > </property> > <property> > <name>yarn.nodemanager.webapp.https.address</name> > <value>${myriad.yarn.nodemanager.webapp.address}</value> > </property> > <property> > <name>yarn.nodemanager.localizer.address</name> > <value>${myriad.yarn.nodemanager.localizer.address}</value> > </property> > > <!-- Myriad Scheduler configuration --> > <property> > <name>yarn.resourcemanager.scheduler.class</name> > <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> > </property> > > <!-- Needed for Fine Grain Scaling --> > <property> > <name>yarn.scheduler.minimum-allocation-vcores</name> > <value>0</value> > </property> > <property> > <name>yarn.scheduler.minimum-allocation-mb</name> > <value>0</value> > </property> > </configuration> > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)