[
https://issues.apache.org/jira/browse/MESOS-4869?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15193774#comment-15193774
]
Anthony Scalisi edited comment on MESOS-4869 at 3/14/16 6:06 PM:
-----------------------------------------------------------------
What do you mean ? Without having Mesos doing the health checks, on a host with
6 tasks for example:
{noformat}
scalp@mesos-slave-i-d00b6017 $ free -m
total used free shared buffers cached
Mem: 16047 15306 740 0 3174 2547
-/+ buffers/cache: 9583 6463
Swap: 0 0 0
root@mesos-slave-i-d00b6017 # docker stats --no-stream
CONTAINER CPU % MEM USAGE / LIMIT MEM %
NET I/O BLOCK I/O
33cb349404e1 3.23% 897.8 MB / 1.611 GB 55.74%
4.859 GB / 4.625 GB 53.25 kB / 61.44 kB
61eba49cf71d 3.22% 1.166 GB / 1.611 GB 72.41%
5.49 GB / 5.155 GB 106.5 kB / 118.8 kB
630739e12032 3.76% 1.163 GB / 1.611 GB 72.22%
3.891 GB / 3.657 GB 348.2 kB / 118.8 kB
b5b9da9facfb 2.84% 901.9 MB / 1.611 GB 55.99%
2.254 GB / 2.153 GB 0 B / 118.8 kB
dcd2a73f71a9 3.55% 1.29 GB / 1.611 GB 80.10%
2.726 GB / 2.672 GB 0 B / 118.8 kB
de923d88a781 3.17% 889.5 MB / 1.611 GB 55.23%
3.817 GB / 3.645 GB 36.86 kB / 61.44 kB
{noformat}
Or another with 11 tasks:
{noformat}
root@mesos-slave-i-0fe036d7 # free -m
total used free shared buffers cached
Mem: 16047 15189 857 0 1347 688
-/+ buffers/cache: 13153 2893
Swap: 0
root@mesos-slave-i-0fe036d7 # docker stats --no-stream
CONTAINER CPU % MEM USAGE / LIMIT MEM %
NET I/O BLOCK I/O
1527ccec3562 0.39% 46.75 MB / 134.2 MB 34.83%
318.5 MB / 283.5 MB 634.9 kB / 0 B
16c0afe372f1 3.12% 1.139 GB / 1.611 GB 70.69%
5.443 GB / 5.139 GB 1.757 MB / 118.8 kB
2aaac6a34f3b 3.50% 1.34 GB / 1.611 GB 83.18%
9.928 GB / 9.006 GB 2.646 MB / 118.8 kB
4bda58242e66 2.57% 875.5 MB / 1.611 GB 54.36%
4.853 GB / 4.632 GB 135.2 kB / 61.44 kB
67ed575e6f44 2.14% 1.171 GB / 1.611 GB 72.73%
3.878 GB / 3.664 GB 4.739 MB / 118.8 kB
87010c4fa547 4.23% 1.208 GB / 1.611 GB 74.99%
313.5 MB / 419.1 MB 213 kB / 94.21 kB
8ca7c160b196 1.73% 730.4 MB / 1.611 GB 45.35%
305.6 MB / 447.7 MB 0 B / 61.44 kB
cbac44b2663c 4.66% 1.088 GB / 1.611 GB 67.53%
16.48 GB / 14.91 GB 262.1 kB / 61.44 kB
d0fe165aecac 3.02% 901.2 MB / 1.611 GB 55.95%
1.573 GB / 1.555 GB 106.5 kB / 61.44 kB
df668f59a149 3.57% 1.143 GB / 1.611 GB 70.98%
2.732 GB / 2.681 GB 1.888 MB / 118.8 kB
e0fc97fa33cf 3.43% 1.034 GB / 1.611 GB 64.21%
3.823 GB / 3.655 GB 2.433 MB / 61.44 kB
{noformat}
If you were referring to the actual Mesos processes:
{noformat}
root@mesos-slave-i-0fe036d7 # ps awwuxf | egrep "mesos-docker|mesos-slave" |
egrep -v "grep|node"
root 27470 0.3 0.3 962568 51020 ? Ssl Mar11 14:46
/usr/sbin/mesos-slave
--master=zk://10.92.21.247:2181,10.92.31.170:2181,10.92.41.178:2181/mesos
--log_dir=/var/log/mesos --containerizers=docker,mesos
--docker_stop_timeout=30secs --executor_registration_timeout=5mins
--executor_shutdown_grace_period=90secs --gc_delay=1weeks
--hostname=mesos-slave-i-0fe036d7.example.com --ip=10.92.22.241
--isolation=cgroups/cpu,cgroups/mem --logbufsecs=1 --recover=reconnect
--strict=false --work_dir=/opt/mesos --attributes=az:us-west-2a
--resources=cpus:4;mem:16047;ports:[31000-32000]
root 27511 0.0 0.0 5916 596 ? S Mar11 0:00 \_ logger -p
user.info -t mesos-slave[27470]
root 27512 0.0 0.0 5916 1884 ? S Mar11 0:00 \_ logger -p
user.err -t mesos-slave[27470]
root 28907 0.1 0.0 802068 5360 ? Ssl Mar11 7:02 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.f552977a-040c-41a2-bb60-0e441c6491ef
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_metric-green.cac70614-e7d1-11e5-a617-02429957d388/runs/f552977a-040c-41a2-bb60-0e441c6491ef
--stop_timeout=30secs
root 29193 0.1 0.0 802596 5816 ? Ssl Mar11 7:02 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.5ada3858-b09b-4a5e-a320-b3c66bb237a6
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_push-green.cac70613-e7d1-11e5-a617-02429957d388/runs/5ada3858-b09b-4a5e-a320-b3c66bb237a6
--stop_timeout=30secs
root 29373 0.1 0.0 802596 5172 ? Ssl Mar11 7:00 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.2a93bbbb-0daa-4f22-bbb8-aa7a92791918
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_email-green.cac6b7f0-e7d1-11e5-a617-02429957d388/runs/2a93bbbb-0daa-4f22-bbb8-aa7a92791918
--stop_timeout=30secs
root 29538 0.1 0.0 802068 5768 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.c7aa0614-8afa-404a-a6a7-a591f3f20371
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_search-green.cac6df01-e7d1-11e5-a617-02429957d388/runs/c7aa0614-8afa-404a-a6a7-a591f3f20371
--stop_timeout=30secs
root 30831 0.1 0.0 802068 6012 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.2c086ae3-c0b9-4069-983a-d8efc37ff220
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_user-green.6c806415-e7d2-11e5-a617-02429957d388/runs/2c086ae3-c0b9-4069-983a-d8efc37ff220
--stop_timeout=30secs
root 30989 0.1 0.0 802068 5508 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.79d0c2bd-c3d5-4e47-a7de-74ce713dd6cf
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_identity-green.6d185e58-e7d2-11e5-a617-02429957d388/runs/79d0c2bd-c3d5-4e47-a7de-74ce713dd6cf
--stop_timeout=30secs
root 31132 0.1 0.0 802068 5612 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.5e370455-97fc-476c-9cc4-c300c472a002
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_integration-green.6d185e57-e7d2-11e5-a617-02429957d388/runs/5e370455-97fc-476c-9cc4-c300c472a002
--stop_timeout=30secs
root 31292 0.1 0.0 802596 5688 ? Ssl Mar11 7:00 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.ff592e36-b9da-48e6-9d6c-960edba25050
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_media-green.6d183746-e7d2-11e5-a617-02429957d388/runs/ff592e36-b9da-48e6-9d6c-960edba25050
--stop_timeout=30secs
root 2292 0.1 0.0 802068 5388 ? Ssl Mar11 6:57 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.7caeae52-f0f3-43b9-b89a-fee798841757
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_chat-green.037a7cc1-e7d4-11e5-a617-02429957d388/runs/7caeae52-f0f3-43b9-b89a-fee798841757
--stop_timeout=30secs
root 2068 0.1 0.0 802068 5216 ? Ssl Mar12 6:31 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.9bd3e685-8ed1-442c-84bb-7e8c0a37acfe
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_notification-green.383537a4-e7fa-11e5-a617-02429957d388/runs/9bd3e685-8ed1-442c-84bb-7e8c0a37acfe
--stop_timeout=30secs
{noformat}
was (Author: scalp42):
What do you mean ? Without having Mesos doing the health checks, on a host with
6 tasks for example:
{noformat}
scalp@mesos-slave-i-d00b6017 $ free -m
total used free shared buffers cached
Mem: 16047 15306 740 0 3174 2547
-/+ buffers/cache: 9583 6463
Swap: 0 0 0
root@mesos-slave-i-d00b6017 # docker stats --no-stream
CONTAINER CPU % MEM USAGE / LIMIT MEM %
NET I/O BLOCK I/O
33cb349404e1 3.23% 897.8 MB / 1.611 GB 55.74%
4.859 GB / 4.625 GB 53.25 kB / 61.44 kB
61eba49cf71d 3.22% 1.166 GB / 1.611 GB 72.41%
5.49 GB / 5.155 GB 106.5 kB / 118.8 kB
630739e12032 3.76% 1.163 GB / 1.611 GB 72.22%
3.891 GB / 3.657 GB 348.2 kB / 118.8 kB
b5b9da9facfb 2.84% 901.9 MB / 1.611 GB 55.99%
2.254 GB / 2.153 GB 0 B / 118.8 kB
dcd2a73f71a9 3.55% 1.29 GB / 1.611 GB 80.10%
2.726 GB / 2.672 GB 0 B / 118.8 kB
de923d88a781 3.17% 889.5 MB / 1.611 GB 55.23%
3.817 GB / 3.645 GB 36.86 kB / 61.44 kB
{noformat}
Or another with 11 tasks:
{noformat}
root@mesos-slave-i-0fe036d7 # free -m
total used free shared buffers cached
Mem: 16047 15189 857 0 1347 688
-/+ buffers/cache: 13153 2893
Swap: 0
root@mesos-slave-i-0fe036d7 # docker stats --no-stream
CONTAINER CPU % MEM USAGE / LIMIT MEM %
NET I/O BLOCK I/O
1527ccec3562 0.39% 46.75 MB / 134.2 MB 34.83%
318.5 MB / 283.5 MB 634.9 kB / 0 B
16c0afe372f1 3.12% 1.139 GB / 1.611 GB 70.69%
5.443 GB / 5.139 GB 1.757 MB / 118.8 kB
2aaac6a34f3b 3.50% 1.34 GB / 1.611 GB 83.18%
9.928 GB / 9.006 GB 2.646 MB / 118.8 kB
4bda58242e66 2.57% 875.5 MB / 1.611 GB 54.36%
4.853 GB / 4.632 GB 135.2 kB / 61.44 kB
67ed575e6f44 2.14% 1.171 GB / 1.611 GB 72.73%
3.878 GB / 3.664 GB 4.739 MB / 118.8 kB
87010c4fa547 4.23% 1.208 GB / 1.611 GB 74.99%
313.5 MB / 419.1 MB 213 kB / 94.21 kB
8ca7c160b196 1.73% 730.4 MB / 1.611 GB 45.35%
305.6 MB / 447.7 MB 0 B / 61.44 kB
cbac44b2663c 4.66% 1.088 GB / 1.611 GB 67.53%
16.48 GB / 14.91 GB 262.1 kB / 61.44 kB
d0fe165aecac 3.02% 901.2 MB / 1.611 GB 55.95%
1.573 GB / 1.555 GB 106.5 kB / 61.44 kB
df668f59a149 3.57% 1.143 GB / 1.611 GB 70.98%
2.732 GB / 2.681 GB 1.888 MB / 118.8 kB
e0fc97fa33cf 3.43% 1.034 GB / 1.611 GB 64.21%
3.823 GB / 3.655 GB 2.433 MB / 61.44 kB
{noformat}
If you were referring to the actual Mesos processes:
{noformat}
root@mesos-slave-i-0fe036d7 # ps awwuxf | egrep "mesos-docker|mesos-slave" |
egrep -v "grep|node"
root 27470 0.3 0.3 962568 51020 ? Ssl Mar11 14:46
/usr/sbin/mesos-slave
--master=zk://10.92.21.247:2181,10.92.31.170:2181,10.92.41.178:2181/mesos
--log_dir=/var/log/mesos --containerizers=docker,mesos
--docker_stop_timeout=30secs --executor_registration_timeout=5mins
--executor_shutdown_grace_period=90secs --gc_delay=1weeks
--hostname=mesos-slave-i-0fe036d7.gz-prod.us-west-2a.gearzero.us
--ip=10.92.22.241 --isolation=cgroups/cpu,cgroups/mem --logbufsecs=1
--recover=reconnect --strict=false --work_dir=/opt/mesos
--attributes=az:us-west-2a --resources=cpus:4;mem:16047;ports:[31000-32000]
root 27511 0.0 0.0 5916 596 ? S Mar11 0:00 \_ logger -p
user.info -t mesos-slave[27470]
root 27512 0.0 0.0 5916 1884 ? S Mar11 0:00 \_ logger -p
user.err -t mesos-slave[27470]
root 28907 0.1 0.0 802068 5360 ? Ssl Mar11 7:02 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.f552977a-040c-41a2-bb60-0e441c6491ef
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_metric-green.cac70614-e7d1-11e5-a617-02429957d388/runs/f552977a-040c-41a2-bb60-0e441c6491ef
--stop_timeout=30secs
root 29193 0.1 0.0 802596 5816 ? Ssl Mar11 7:02 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.5ada3858-b09b-4a5e-a320-b3c66bb237a6
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_push-green.cac70613-e7d1-11e5-a617-02429957d388/runs/5ada3858-b09b-4a5e-a320-b3c66bb237a6
--stop_timeout=30secs
root 29373 0.1 0.0 802596 5172 ? Ssl Mar11 7:00 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.2a93bbbb-0daa-4f22-bbb8-aa7a92791918
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_email-green.cac6b7f0-e7d1-11e5-a617-02429957d388/runs/2a93bbbb-0daa-4f22-bbb8-aa7a92791918
--stop_timeout=30secs
root 29538 0.1 0.0 802068 5768 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.c7aa0614-8afa-404a-a6a7-a591f3f20371
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_search-green.cac6df01-e7d1-11e5-a617-02429957d388/runs/c7aa0614-8afa-404a-a6a7-a591f3f20371
--stop_timeout=30secs
root 30831 0.1 0.0 802068 6012 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.2c086ae3-c0b9-4069-983a-d8efc37ff220
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_user-green.6c806415-e7d2-11e5-a617-02429957d388/runs/2c086ae3-c0b9-4069-983a-d8efc37ff220
--stop_timeout=30secs
root 30989 0.1 0.0 802068 5508 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.79d0c2bd-c3d5-4e47-a7de-74ce713dd6cf
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_identity-green.6d185e58-e7d2-11e5-a617-02429957d388/runs/79d0c2bd-c3d5-4e47-a7de-74ce713dd6cf
--stop_timeout=30secs
root 31132 0.1 0.0 802068 5612 ? Ssl Mar11 6:59 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.5e370455-97fc-476c-9cc4-c300c472a002
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_integration-green.6d185e57-e7d2-11e5-a617-02429957d388/runs/5e370455-97fc-476c-9cc4-c300c472a002
--stop_timeout=30secs
root 31292 0.1 0.0 802596 5688 ? Ssl Mar11 7:00 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.ff592e36-b9da-48e6-9d6c-960edba25050
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_media-green.6d183746-e7d2-11e5-a617-02429957d388/runs/ff592e36-b9da-48e6-9d6c-960edba25050
--stop_timeout=30secs
root 2292 0.1 0.0 802068 5388 ? Ssl Mar11 6:57 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.7caeae52-f0f3-43b9-b89a-fee798841757
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_chat-green.037a7cc1-e7d4-11e5-a617-02429957d388/runs/7caeae52-f0f3-43b9-b89a-fee798841757
--stop_timeout=30secs
root 2068 0.1 0.0 802068 5216 ? Ssl Mar12 6:31 \_
mesos-docker-executor
--container=mesos-29e183be-f611-41b4-824c-2d05b052231b-S3.9bd3e685-8ed1-442c-84bb-7e8c0a37acfe
--docker=docker --docker_socket=/var/run/docker.sock --help=false
--launcher_dir=/usr/libexec/mesos --mapped_directory=/mnt/mesos/sandbox
--sandbox_directory=/opt/mesos/slaves/29e183be-f611-41b4-824c-2d05b052231b-S3/frameworks/8ace1cd7-5a79-40f6-99cd-62c87ce2ef49-0001/executors/prod_talkk_notification-green.383537a4-e7fa-11e5-a617-02429957d388/runs/9bd3e685-8ed1-442c-84bb-7e8c0a37acfe
--stop_timeout=30secs
{noformat}
> /usr/libexec/mesos/mesos-health-check using/leaking a lot of memory
> -------------------------------------------------------------------
>
> Key: MESOS-4869
> URL: https://issues.apache.org/jira/browse/MESOS-4869
> Project: Mesos
> Issue Type: Bug
> Affects Versions: 0.27.1
> Reporter: Anthony Scalisi
> Priority: Critical
>
> We switched our health checks in Marathon from HTTP to COMMAND:
> {noformat}
> "healthChecks": [
> {
> "protocol": "COMMAND",
> "path": "/ops/ping",
> "command": { "value": "curl --silent -f -X GET
> http://$HOST:$PORT0/ops/ping > /dev/null" },
> "gracePeriodSeconds": 90,
> "intervalSeconds": 2,
> "portIndex": 0,
> "timeoutSeconds": 5,
> "maxConsecutiveFailures": 3
> }
> ]
> {noformat}
> All our applications have the same health check (and /ops/ping endpoint).
> Even though we have the issue on all our Meos slaves, I'm going to focus on a
> particular one: *mesos-slave-i-e3a9c724*.
> The slave has 16 gigs of memory, with about 12 gigs allocated for 8 tasks:
> !https://i.imgur.com/gbRf804.png!
> Here is a *docker ps* on it:
> {noformat}
> root@mesos-slave-i-e3a9c724 # docker ps
> CONTAINER ID IMAGE COMMAND CREATED
> STATUS PORTS NAMES
> 4f7c0aa8d03a java:8 "/bin/sh -c 'JAVA_OPT" 6 hours ago
> Up 6 hours 0.0.0.0:31926->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.3dbb1004-5bb8-432f-8fd8-b863bd29341d
> 66f2fc8f8056 java:8 "/bin/sh -c 'JAVA_OPT" 6 hours ago
> Up 6 hours 0.0.0.0:31939->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.60972150-b2b1-45d8-8a55-d63e81b8372a
> f7382f241fce java:8 "/bin/sh -c 'JAVA_OPT" 6 hours ago
> Up 6 hours 0.0.0.0:31656->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.39731a2f-d29e-48d1-9927-34ab8c5f557d
> 880934c0049e java:8 "/bin/sh -c 'JAVA_OPT" 24 hours ago
> Up 24 hours 0.0.0.0:31371->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.23dfe408-ab8f-40be-bf6f-ce27fe885ee0
> 5eab1f8dac4a java:8 "/bin/sh -c 'JAVA_OPT" 46 hours ago
> Up 46 hours 0.0.0.0:31500->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.5ac75198-283f-4349-a220-9e9645b313e7
> b63740fe56e7 java:8 "/bin/sh -c 'JAVA_OPT" 46 hours ago
> Up 46 hours 0.0.0.0:31382->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.5d417f16-df24-49d5-a5b0-38a7966460fe
> 5c7a9ea77b0e java:8 "/bin/sh -c 'JAVA_OPT" 2 days ago
> Up 2 days 0.0.0.0:31186->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.b05043c5-44fc-40bf-aea2-10354e8f5ab4
> 53065e7a31ad java:8 "/bin/sh -c 'JAVA_OPT" 2 days ago
> Up 2 days 0.0.0.0:31839->8080/tcp
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.f0a3f4c5-ecdb-4f97-bede-d744feda670c
> {noformat}
> Here is a *docker stats* on it:
> {noformat}
> root@mesos-slave-i-e3a9c724 # docker stats
> CONTAINER CPU % MEM USAGE / LIMIT MEM %
> NET I/O BLOCK I/O
> 4f7c0aa8d03a 2.93% 797.3 MB / 1.611 GB 49.50%
> 1.277 GB / 1.189 GB 155.6 kB / 151.6 kB
> 53065e7a31ad 8.30% 738.9 MB / 1.611 GB 45.88%
> 419.6 MB / 554.3 MB 98.3 kB / 61.44 kB
> 5c7a9ea77b0e 4.91% 1.081 GB / 1.611 GB 67.10%
> 423 MB / 526.5 MB 3.219 MB / 61.44 kB
> 5eab1f8dac4a 3.13% 1.007 GB / 1.611 GB 62.53%
> 2.737 GB / 2.564 GB 6.566 MB / 118.8 kB
> 66f2fc8f8056 3.15% 768.1 MB / 1.611 GB 47.69%
> 258.5 MB / 252.8 MB 1.86 MB / 151.6 kB
> 880934c0049e 10.07% 735.1 MB / 1.611 GB 45.64%
> 1.451 GB / 1.399 GB 573.4 kB / 94.21 kB
> b63740fe56e7 12.04% 629 MB / 1.611 GB 39.06%
> 10.29 GB / 9.344 GB 8.102 MB / 61.44 kB
> f7382f241fce 6.21% 505 MB / 1.611 GB 31.36%
> 153.4 MB / 151.9 MB 5.837 MB / 94.21 kB
> {noformat}
> Not much else is running on the slave, yet the used memory doesn't map to the
> tasks memory:
> {noformat}
> Mem:16047M used:13340M buffers:1139M cache:776M
> {noformat}
> If I exec into the container (*java:8* image), I can see correctly the shell
> calls to execute the curl specified in the health check as expected and exit
> correctly.
> The only change we noticed since the memory usage woes was related to moving
> to Mesos doing the health checks instead, so I decided to take a look:
> {noformat}
> root@mesos-slave-i-e3a9c724 # ps awwx | grep health_check | grep -v grep
> 2504 ? Sl 47:33 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:53432
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.f0a3f4c5-ecdb-4f97-bede-d744feda670c
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_email-green.b086206a-e000-11e5-a617-02429957d388
> 4220 ? Sl 47:26 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:54982
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.b05043c5-44fc-40bf-aea2-10354e8f5ab4
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_chat-green.ed53ec41-e000-11e5-a617-02429957d388
> 7444 ? Sl 1:31 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:59422
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.60972150-b2b1-45d8-8a55-d63e81b8372a
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_identity-green.aeb2ef3b-e219-11e5-a617-02429957d388
> 10368 ? Sl 1:30 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:40981
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.3dbb1004-5bb8-432f-8fd8-b863bd29341d
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_channel-green.c6fbd2ac-e219-11e5-a617-02429957d388
> 12399 ? Sl 9:45 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:44815
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.23dfe408-ab8f-40be-bf6f-ce27fe885ee0
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_integration-green.143865d5-e17d-11e5-a617-02429957d388
> 13538 ? Sl 24:54 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:56598
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.5d417f16-df24-49d5-a5b0-38a7966460fe
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_metric-green.75296986-e0c7-11e5-a617-02429957d388
> 32034 ? Sl 1:31 /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:48119
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.39731a2f-d29e-48d1-9927-34ab8c5f557d
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_push-green.601337e6-e219-11e5-a617-02429957d388
> {noformat}
> The memory usage is really bad:
> {noformat}
> root@mesos-slave-i-e3a9c724 # ps -eo size,pid,user,command --sort -size |
> grep health_check | awk '{ hr=$1/1024 ; printf("%13.2f Mb ",hr) } { for ( x=4
> ; x<=NF ; x++ ) { printf("%s ",$x) } print "" }'
> 2185.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:53432
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.f0a3f4c5-ecdb-4f97-bede-d744feda670c
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_email-green.b086206a-e000-11e5-a617-02429957d388
> 2185.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:54982
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.b05043c5-44fc-40bf-aea2-10354e8f5ab4
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_chat-green.ed53ec41-e000-11e5-a617-02429957d388
> 1673.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:56598
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.5d417f16-df24-49d5-a5b0-38a7966460fe
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_metric-green.75296986-e0c7-11e5-a617-02429957d388
> 1161.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:44815
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.23dfe408-ab8f-40be-bf6f-ce27fe885ee0
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_integration-green.143865d5-e17d-11e5-a617-02429957d388
> 649.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:59422
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.60972150-b2b1-45d8-8a55-d63e81b8372a
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_identity-green.aeb2ef3b-e219-11e5-a617-02429957d388
> 649.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:40981
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.3dbb1004-5bb8-432f-8fd8-b863bd29341d
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_channel-green.c6fbd2ac-e219-11e5-a617-02429957d388
> 649.39 Mb /usr/libexec/mesos/mesos-health-check
> --executor=(1)@10.92.32.63:48119
> --health_check_json={"command":{"shell":true,"value":"docker exec
> mesos-29e183be-f611-41b4-824c-2d05b052231b-S6.39731a2f-d29e-48d1-9927-34ab8c5f557d
> sh -c \" curl --silent -f -X GET http:\/\/$HOST:$PORT0\/ops\/ping >
> \/dev\/null
> \""},"consecutive_failures":3,"delay_seconds":0.0,"grace_period_seconds":90.0,"interval_seconds":2.0,"timeout_seconds":5.0}
> --task_id=prod_talkk_push-green.601337e6-e219-11e5-a617-02429957d388
> 0.32 Mb grep --color=auto health_check
> {noformat}
> Killing the *mesos-health-check* process for each container fix our memory
> issues (but I'm assuming health checks won't be reported anymore or
> something):
> {noformat}
> root@mesos-slave-i-e3a9c724 # date ; free -m ; ps awwx | grep health_check |
> grep -v grep | awk '{print $1}' | xargs -I% -P1 kill % ; date ; free -m
> Fri Mar 4 21:20:55 UTC 2016
> total used free shared buffers cached
> Mem: 16047 13538 2508 0 1140 774
> -/+ buffers/cache: 11623 4423
> Swap: 0 0 0
> Fri Mar 4 21:20:56 UTC 2016
> total used free shared buffers cached
> Mem: 16047 9101 6945 0 1140 774
> -/+ buffers/cache: 7186 8860
> Swap: 0 0 0
> {noformat}
> We're reverting to Marathon doing the health checks for now but would like to
> emphasize it's happening across all our slaves (not an isolated issue).
> Thanks for looking into it :)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)