[
https://issues.apache.org/jira/browse/MESOS-5799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15364848#comment-15364848
]
Jie Yu commented on MESOS-5799:
-------------------------------
What is the docker version used ?
> docker::inspect() may get wrong output when a docker container is not in
> "running" state
> ----------------------------------------------------------------------------------------
>
> Key: MESOS-5799
> URL: https://issues.apache.org/jira/browse/MESOS-5799
> Project: Mesos
> Issue Type: Bug
> Components: containerization, docker
> Reporter: Kevin Klues
> Labels: containerizer, docker
> Fix For: 1.0.0
>
>
> I (klueska) am copying the text from an email I got about a bug report from
> Yubo Li at IBM.
> docker::inspect() may get wrong output when the docker container is not in
> "running" state. In this case, the "docker inspect" will failed to parse
> data, and system can not enter TASK:RUNNING status.
> I attached related logs in stderr, I printed the docker inspect output. The
> inspected output shows that the docker is in "created" status, not "running",
> so that many of inspect fields are invalid.
> Possible Fix: detect the "State->Running" field, and get success return when
> "State->Running" is true.
> {noformat}
> I0706 09:01:05.342895 2975 docker.cpp:780] Running docker -H
> unix:///var/run/docker.sock run --cpu-shares 512 --memory 536870912 -e
> MARATHON_APP_VERSION=2016-07-06T08:15:02.610Z -e HOST=9.186.57.67 -e
> MARATHON_APP_RESOURCE_CPUS=0.5 -e MARATHON_APP_RESOURCE_GPUS=1 -e
> MARATHON_APP_DOCKER_IMAGE=cuda_test_v0.1 -e PORT_10000=31435 -e
> MESOS_TASK_ID=ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607 -e
> PORT=31435 -e MARATHON_APP_RESOURCE_MEM=512.0 -e PORTS=31435 -e
> MARATHON_APP_RESOURCE_DISK=0.0 -e MARATHON_APP_LABELS= -e
> MARATHON_APP_ID=/ubuntu-gpu-32520 -e PORT0=31435 -e
> MESOS_SANDBOX=/mnt/mesos/sandbox -e
> MESOS_CONTAINER_NAME=mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439
> -v
> /var/run/mesos/slaves/1875c0d3-9712-43c3-9d58-572c89fac50b-S1/frameworks/aee07017-f8e6-4ed5-8008-b4ea3a090282-0000/executors/ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607/runs/cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439:/mnt/mesos/sandbox
> --net host --device=/dev/nvidiactl:/dev/nvidiactl:rwm
> --device=/dev/nvidia-uvm:/dev/nvidia-uvm:rwm
> --device=/dev/nvidia0:/dev/nvidia0:rwm --entrypoint /bin/sh --name
> mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439
> cuda_test_v0.1 -c nvidia-smi && sleep 60s
> I0706 09:01:05.345935 2975 docker.cpp:943] Running docker -H
> unix:///var/run/docker.sock inspect
> mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439
> I0706 09:01:05.548992 2976 docker.cpp:249] Docker inspect: [
> {
> "Id": "5a4dc17e739b60593c04abf310f2485dddea832476e83007387b612839933f5a",
> "Created": "2016-07-06T09:01:05.531216924Z",
> "Path": "/bin/sh",
> "Args": [
> "-c",
> "nvidia-smi \u0026\u0026 sleep 60s"
> ],
> "State": {
> "Status": "created",
> "Running": false,
> "Paused": false,
> "Restarting": false,
> "OOMKilled": false,
> "Dead": false,
> "Pid": 0,
> "ExitCode": 0,
> "Error": "",
> "StartedAt": "0001-01-01T00:00:00Z",
> "FinishedAt": "0001-01-01T00:00:00Z"
> },
> "Image":
> "8cf6c8da7045ec24b1e561906dfa54ab0276753ec617e139a7b2da3ef72d245e",
> "ResolvConfPath": "",
> "HostnamePath": "",
> "HostsPath": "",
> "LogPath": "",
> "Name":
> "/mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439",
> "RestartCount": 0,
> "Driver": "aufs",
> "ExecDriver": "native-0.2",
> "MountLabel": "",
> "ProcessLabel": "",
> "AppArmorProfile": "",
> "ExecIDs": null,
> "HostConfig": {
> "Binds": null,
> "ContainerIDFile": "",
> "LxcConf": null,
> "Memory": 0,
> "MemoryReservation": 0,
> "MemorySwap": 0,
> "KernelMemory": 0,
> "CpuShares": 0,
> "CpuPeriod": 0,
> "CpusetCpus": "",
> "CpusetMems": "",
> "CpuQuota": 0,
> "BlkioWeight": 0,
> "OomKillDisable": false,
> "MemorySwappiness": null,
> "Privileged": false,
> "PortBindings": null,
> "Links": null,
> "PublishAllPorts": false,
> "Dns": null,
> "DnsOptions": null,
> "DnsSearch": null,
> "ExtraHosts": null,
> "VolumesFrom": null,
> "Devices": null,
> "NetworkMode": "",
> "IpcMode": "",
> "PidMode": "",
> "UTSMode": "",
> "CapAdd": null,
> "CapDrop": null,
> "GroupAdd": null,
> "RestartPolicy": {
> "Name": "",
> "MaximumRetryCount": 0
> },
> "SecurityOpt": null,
> "ReadonlyRootfs": false,
> "Ulimits": null,
> "LogConfig": {
> "Type": "json-file",
> "Config": {}
> },
> "CgroupParent": "",
> "ConsoleSize": [
> 0,
> 0
> ],
> "VolumeDriver": ""
> },
> "GraphDriver": {
> "Name": "aufs",
> "Data": null
> },
> "Mounts": [],
> "Config": {
> "Hostname": "5a4dc17e739b",
> "Domainname": "",
> "User": "",
> "AttachStdin": false,
> "AttachStdout": true,
> "AttachStderr": true,
> "Tty": false,
> "OpenStdin": false,
> "StdinOnce": false,
> "Env": [
> "MARATHON_APP_VERSION=2016-07-06T08:15:02.610Z",
> "HOST=9.186.57.67",
> "MARATHON_APP_RESOURCE_CPUS=0.5",
> "MARATHON_APP_RESOURCE_GPUS=1",
> "MARATHON_APP_DOCKER_IMAGE=cuda_test_v0.1",
> "PORT_10000=31435",
>
> "MESOS_TASK_ID=ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607",
> "PORT=31435",
> "MARATHON_APP_RESOURCE_MEM=512.0",
> "PORTS=31435",
> "MARATHON_APP_RESOURCE_DISK=0.0",
> "MARATHON_APP_LABELS=",
> "MARATHON_APP_ID=/ubuntu-gpu-32520",
> "PORT0=31435",
> "MESOS_SANDBOX=/mnt/mesos/sandbox",
>
> "MESOS_CONTAINER_NAME=mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439",
>
> "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
> ],
> "Cmd": [
> "-c",
> "nvidia-smi \u0026\u0026 sleep 60s"
> ],
> "Image": "cuda_test_v0.1",
> "Volumes": null,
> "WorkingDir": "",
> "Entrypoint": [
> "/bin/sh"
> ],
> "OnBuild": null,
> "Labels": {},
> "StopSignal": "SIGTERM"
> },
> "NetworkSettings": {
> "Bridge": "",
> "SandboxID": "",
> "HairpinMode": false,
> "LinkLocalIPv6Address": "",
> "LinkLocalIPv6PrefixLen": 0,
> "Ports": null,
> "SandboxKey": "",
> "SecondaryIPAddresses": null,
> "SecondaryIPv6Addresses": null,
> "EndpointID": "",
> "Gateway": "",
> "GlobalIPv6Address": "",
> "GlobalIPv6PrefixLen": 0,
> "IPAddress": "",
> "IPPrefixLen": 0,
> "IPv6Gateway": "",
> "MacAddress": "",
> "Networks": null
> }
> }
> ]
> I0706 09:01:05.549659 2976 docker.cpp:335] Unable to detect IP Address at
> 'NetworkSettings.Networks..IPAddress', attempting deprecated field
> WARNING: Your kernel does not support swap limit capabilities, memory limited
> without swap.
> I0706 09:01:52.983609 2973 exec.cpp:486] Agent exited, but framework has
> checkpointing enabled. Waiting 15mins to reconnect with agent
> 1875c0d3-9712-43c3-9d58-572c89fac50b-S1
> I0706 09:02:06.057607 2978 exec.cpp:549] Executor sending status update
> TASK_FINISHED (UUID: 2cff35f2-9512-4120-b912-74a82c197696) for task
> ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607 of framework
> aee07017-f8e6-4ed5-8008-b4ea3a090282-0000
> I0706 09:02:06.058717 2980 poll_socket.cpp:131] Socket error while connecting
> I0706 09:02:06.058815 2980 process.cpp:1799] Failed to send
> 'mesos.internal.StatusUpdateMessage' to '127.0.1.1:5051', connect: Socket
> error while connecting
> E0706 09:02:06.058931 2980 process.cpp:2104] Failed to shutdown socket with
> fd 6: Transport endpoint is not connected
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)