[ https://issues.apache.org/jira/browse/MYRIAD-255?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Liu You updated MYRIAD-255: --------------------------- Description: I can't run yarn application, and the job gets stucked every time I execute the command 'hadoop jar xxx.....'. The Myriad Web UI shows that the status of the task changes from staging to pending, then staging, then pending..... There are some screenshots in the attachments below. Please help me. Thank you very much. yarn-mesos-resourcemanager-s18.log: {code:java} 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server Responder: starting 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 8032: starting 2017-07-20 17:36:46,955 INFO org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor value: "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" lost with exit status: 9 2017-07-20 17:36:46,969 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: TASK_FAILED 2017-07-20 17:36:46,970 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned to active state 2017-07-20 17:36:46,971 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked as pending failed task with id value: "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898" {code} myriad-config-default.yml: {code:java} mesosMaster: 192.168.130.18:5050 checkpoint: false frameworkFailoverTimeout: 43200000 frameworkName: MyriadAlpha frameworkRole: "yarn" frameworkUser: mesos # User the Node Manager runs as, required if nodeManagerURI set, otherwise defaults to the user # running the resource manager. frameworkSuperUser: root # To be deprecated, currently permissions need set by a superuser due to Mesos-1790. Must be # root or have passwordless sudo. Required if nodeManagerURI set, ignored otherwise. #nativeLibrary: /usr/local/lib/libmesos.so nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so zkServers: 192.168.130.18:2181 zkTimeout: 20000 restApiPort: 8192 #servedConfigPath: dist/config.tgz servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz #servedBinaryPath: dist/hadoop-2.6.0.tgz servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz profiles: zero: # NMs launched with this profile dynamically obtain cpu/mem from Mesos cpu: 0 mem: 0 small: cpu: 2 mem: 2048 medium: cpu: 4 mem: 4096 large: cpu: 10 mem: 12288 nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero profile. medium: 1 # <profile_name : instances> rebalancer: false haEnabled: false nodemanager: jvmMaxMemoryMB: 1024 cpus: 0.2 cgroups: false executor: jvmMaxMemoryMB: 256 path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar #path: file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar #The following should be used for a remotely distributed URI, hdfs assumed but other URI types valid. #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz #configUri: http://s18:8192/api/artifacts/config.tgz #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz yarnEnvironment: YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 #HADOOP_CONF_DIR=config #HADOOP_TMP_DIR=$MESOS_SANDBOX #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes necessary #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox directory #JAVA_LIBRARY_PATH: /opt/mycompany/lib #mesosAuthenticationPrincipal: #mesosAuthenticationSecretFilename: {code} yarn-site.xml: {code:java} <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>s18</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,myriad_executor</value> <!-- If using MapR distro, please use the following value: <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> --> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.myriad_executor.class</name> <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> </property> <property> <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> <value>2000</value> </property> <property> <name>yarn.am.liveness-monitor.expiry-interval-ms</name> <value>10000</value> </property> <property> <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> <value>1000</value> </property> <!-- (more) Site-specific YARN configuration properties --> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>${nodemanager.resource.cpu-vcores}</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>${nodemanager.resource.memory-mb}</value> </property> <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <!-- Dynamic Port Assignment enablement by Mesos --> <property> <name>yarn.nodemanager.address</name> <value>${myriad.yarn.nodemanager.address}</value> </property> <property> <name>yarn.nodemanager.webapp.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.webapp.https.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.localizer.address</name> <value>${myriad.yarn.nodemanager.localizer.address}</value> </property> <!-- Myriad Scheduler configuration --> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> </property> <!-- Needed for Fine Grain Scaling --> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>0</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>0</value> </property> </configuration> {code} was: I can't run yarn application, and the job gets stucked every time I execute the command 'hadoop jar xxx.....'. Please help me. Thank you very much. The Myriad Web UI shows that the status of the task changes from staging to pending, then staging, then pending..... There are some screenshots in the attachments below. yarn-mesos-resourcemanager-s18.log: {code:java} 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server Responder: starting 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 8032: starting 2017-07-20 17:36:46,955 INFO org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor value: "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" lost with exit status: 9 2017-07-20 17:36:46,969 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: TASK_FAILED 2017-07-20 17:36:46,970 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned to active state 2017-07-20 17:36:46,971 INFO org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked as pending failed task with id value: "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898" {code} myriad-config-default.yml: {code:java} mesosMaster: 192.168.130.18:5050 checkpoint: false frameworkFailoverTimeout: 43200000 frameworkName: MyriadAlpha frameworkRole: "yarn" frameworkUser: mesos # User the Node Manager runs as, required if nodeManagerURI set, otherwise defaults to the user # running the resource manager. frameworkSuperUser: root # To be deprecated, currently permissions need set by a superuser due to Mesos-1790. Must be # root or have passwordless sudo. Required if nodeManagerURI set, ignored otherwise. #nativeLibrary: /usr/local/lib/libmesos.so nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so zkServers: 192.168.130.18:2181 zkTimeout: 20000 restApiPort: 8192 #servedConfigPath: dist/config.tgz servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz #servedBinaryPath: dist/hadoop-2.6.0.tgz servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz profiles: zero: # NMs launched with this profile dynamically obtain cpu/mem from Mesos cpu: 0 mem: 0 small: cpu: 2 mem: 2048 medium: cpu: 4 mem: 4096 large: cpu: 10 mem: 12288 nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero profile. medium: 1 # <profile_name : instances> rebalancer: false haEnabled: false nodemanager: jvmMaxMemoryMB: 1024 cpus: 0.2 cgroups: false executor: jvmMaxMemoryMB: 256 path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar #path: file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar #The following should be used for a remotely distributed URI, hdfs assumed but other URI types valid. #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz #configUri: http://s18:8192/api/artifacts/config.tgz #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz yarnEnvironment: YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 #HADOOP_CONF_DIR=config #HADOOP_TMP_DIR=$MESOS_SANDBOX #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes necessary #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox directory #JAVA_LIBRARY_PATH: /opt/mycompany/lib #mesosAuthenticationPrincipal: #mesosAuthenticationSecretFilename: {code} yarn-site.xml: {code:java} <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>s18</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,myriad_executor</value> <!-- If using MapR distro, please use the following value: <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> --> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.myriad_executor.class</name> <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> </property> <property> <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> <value>2000</value> </property> <property> <name>yarn.am.liveness-monitor.expiry-interval-ms</name> <value>10000</value> </property> <property> <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> <value>1000</value> </property> <!-- (more) Site-specific YARN configuration properties --> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>${nodemanager.resource.cpu-vcores}</value> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>${nodemanager.resource.memory-mb}</value> </property> <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> <!-- Dynamic Port Assignment enablement by Mesos --> <property> <name>yarn.nodemanager.address</name> <value>${myriad.yarn.nodemanager.address}</value> </property> <property> <name>yarn.nodemanager.webapp.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.webapp.https.address</name> <value>${myriad.yarn.nodemanager.webapp.address}</value> </property> <property> <name>yarn.nodemanager.localizer.address</name> <value>${myriad.yarn.nodemanager.localizer.address}</value> </property> <!-- Myriad Scheduler configuration --> <property> <name>yarn.resourcemanager.scheduler.class</name> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> </property> <!-- Needed for Fine Grain Scaling --> <property> <name>yarn.scheduler.minimum-allocation-vcores</name> <value>0</value> </property> <property> <name>yarn.scheduler.minimum-allocation-mb</name> <value>0</value> </property> </configuration> {code} > unable to run yarn application with hadoop2.7.3 and mesos1.2.0 > -------------------------------------------------------------- > > Key: MYRIAD-255 > URL: https://issues.apache.org/jira/browse/MYRIAD-255 > Project: Myriad > Issue Type: Bug > Components: Executor, Scheduler > Affects Versions: Myriad 0.2.0 > Environment: Ubuntu 14.04.3 LTS > master hostname: s18 > slaves hostname: s19, s20, s21 > hadoop 2.7.3 > mesos 1.2.0 > Reporter: Liu You > Attachments: mapred-site.xml, > mesos-master.s18.mesos.log.INFO.20170720-173553.21812, > mesos-master.s18.mesos.log.WARNING.20170720-173553.21812, Mesos Web UI.png, > myriad-config-default.yml, myriad ui-1.png, myriad ui-2.png, stucked job.png, > yarn-mesos-resourcemanager-s18.log, yarn-mesos-resourcemanager-s18.out, > yarn-site.xml > > > I can't run yarn application, and the job gets stucked every time I execute > the command 'hadoop jar xxx.....'. > The Myriad Web UI shows that the status of the task changes from staging to > pending, then staging, then pending..... > There are some screenshots in the attachments below. > Please help me. Thank you very much. > yarn-mesos-resourcemanager-s18.log: > {code:java} > 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server > Responder: starting > 2017-07-20 17:36:46,941 INFO org.apache.hadoop.ipc.Server: IPC Server > listener on 8032: starting > 2017-07-20 17:36:46,955 INFO > org.apache.myriad.scheduler.event.handlers.ExecutorLostEventHandler: Executor > value: > "myriad_executor5b12e719-6990-4546-9370-b984fc35b759-00005b12e719-6990-4546-9370-b984fc35b759-O08e5e7115-060b-42c6-b24f-64642048cfa1-S0" > of slave value: "8e5e7115-060b-42c6-b24f-64642048cfa1-S0" > lost with exit status: 9 > 2017-07-20 17:36:46,969 INFO > org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Status > Update for task: nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898 | state: > TASK_FAILED > 2017-07-20 17:36:46,970 INFO > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioned > to active state > 2017-07-20 17:36:46,971 INFO > org.apache.myriad.scheduler.event.handlers.StatusUpdateEventHandler: Marked > as pending failed task with id value: > "nm.medium.ddd2cdab-db6b-4dbc-9d5d-8c339c056898" > {code} > myriad-config-default.yml: > {code:java} > mesosMaster: 192.168.130.18:5050 > checkpoint: false > frameworkFailoverTimeout: 43200000 > frameworkName: MyriadAlpha > frameworkRole: "yarn" > frameworkUser: mesos # User the Node Manager runs as, required if > nodeManagerURI set, otherwise defaults to the user > # running the resource manager. > frameworkSuperUser: root # To be deprecated, currently permissions need set > by a superuser due to Mesos-1790. Must be > # root or have passwordless sudo. Required if > nodeManagerURI set, ignored otherwise. > #nativeLibrary: /usr/local/lib/libmesos.so > nativeLibrary: /home/mesos/mesos/mesos-1.2.0/mesos_install/lib/libmesos.so > zkServers: 192.168.130.18:2181 > zkTimeout: 20000 > restApiPort: 8192 > #servedConfigPath: dist/config.tgz > servedConfigPath: /home/mesos/mesos/hdfs/hadoop-2.7.3/etc/config.tgz > #servedBinaryPath: dist/hadoop-2.6.0.tgz > servedBinaryPath: /home/mesos/mesos/hdfs/binary.tgz > profiles: > zero: # NMs launched with this profile dynamically obtain cpu/mem from > Mesos > cpu: 0 > mem: 0 > small: > cpu: 2 > mem: 2048 > medium: > cpu: 4 > mem: 4096 > large: > cpu: 10 > mem: 12288 > nmInstances: # NMs to start with. Requires at least 1 NM with a non-zero > profile. > medium: 1 # <profile_name : instances> > rebalancer: false > haEnabled: false > nodemanager: > jvmMaxMemoryMB: 1024 > cpus: 0.2 > cgroups: false > executor: > jvmMaxMemoryMB: 256 > path: hdfs://s18:8020/dist/myriad-executor-0.2.0.jar > #path: > file:///home/mesos/mesos/hdfs/hadoop-2.7.3/share/hadoop/yarn/lib/myriad-executor-0.2.0.jar > #The following should be used for a remotely distributed URI, hdfs assumed > but other URI types valid. > #nodeManagerUri: hdfs://s18:8020/user/MR/binary.tgz > #configUri: http://s18:8192/api/artifacts/config.tgz > #jvmUri: https://downloads.mycompany.com/java/jre-7u76-linux-x64.tar.gz > yarnEnvironment: > YARN_HOME: /home/mesos/mesos/hdfs/hadoop-2.7.3 > #HADOOP_CONF_DIR=config > #HADOOP_TMP_DIR=$MESOS_SANDBOX > #YARN_HOME: hadoop-2.7.0 #this should be relative if nodeManagerUri is set > #JAVA_HOME: /usr/lib/jvm/java-default #System dependent, but sometimes > necessary > #JAVA_HOME: /home/mesos/jdk/jdk1.8.0_131 > #JAVA_HOME: jre1.7.0_76 # Path to JRE distribution, relative to sandbox > directory > #JAVA_LIBRARY_PATH: /opt/mycompany/lib > #mesosAuthenticationPrincipal: > #mesosAuthenticationSecretFilename: > {code} > yarn-site.xml: > {code:java} > <configuration> > <!-- Site specific YARN configuration properties --> > <property> > <name>yarn.resourcemanager.hostname</name> > <value>s18</value> > </property> > <property> > <name>yarn.nodemanager.aux-services</name> > <value>mapreduce_shuffle</value> > </property> > > <property> > <name>yarn.nodemanager.aux-services</name> > <value>mapreduce_shuffle,myriad_executor</value> > <!-- If using MapR distro, please use the following value: > <value>mapreduce_shuffle,mapr_direct_shuffle,myriad_executor</value> > --> > </property> > <property> > <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> > <value>org.apache.hadoop.mapred.ShuffleHandler</value> > </property> > <property> > <name>yarn.nodemanager.aux-services.myriad_executor.class</name> > <value>org.apache.myriad.executor.MyriadExecutorAuxService</value> > </property> > <property> > <name>yarn.nm.liveness-monitor.expiry-interval-ms</name> > <value>2000</value> > </property> > <property> > <name>yarn.am.liveness-monitor.expiry-interval-ms</name> > <value>10000</value> > </property> > <property> > <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name> > <value>1000</value> > </property> > <!-- (more) Site-specific YARN configuration properties --> > <property> > <name>yarn.nodemanager.resource.cpu-vcores</name> > <value>${nodemanager.resource.cpu-vcores}</value> > </property> > <property> > <name>yarn.nodemanager.resource.memory-mb</name> > <value>${nodemanager.resource.memory-mb}</value> > </property> > <!-- Disable pmem/vmem checks needed for FGS and hadoop 2.7.2 --> > <property> > <name>yarn.nodemanager.pmem-check-enabled</name> > <value>false</value> > </property> > <property> > <name>yarn.nodemanager.vmem-check-enabled</name> > <value>false</value> > </property> > > <!-- Dynamic Port Assignment enablement by Mesos --> > <property> > <name>yarn.nodemanager.address</name> > <value>${myriad.yarn.nodemanager.address}</value> > </property> > <property> > <name>yarn.nodemanager.webapp.address</name> > <value>${myriad.yarn.nodemanager.webapp.address}</value> > </property> > <property> > <name>yarn.nodemanager.webapp.https.address</name> > <value>${myriad.yarn.nodemanager.webapp.address}</value> > </property> > <property> > <name>yarn.nodemanager.localizer.address</name> > <value>${myriad.yarn.nodemanager.localizer.address}</value> > </property> > > <!-- Myriad Scheduler configuration --> > <property> > <name>yarn.resourcemanager.scheduler.class</name> > <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value> > </property> > > <!-- Needed for Fine Grain Scaling --> > <property> > <name>yarn.scheduler.minimum-allocation-vcores</name> > <value>0</value> > </property> > <property> > <name>yarn.scheduler.minimum-allocation-mb</name> > <value>0</value> > </property> > </configuration> > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)