We've been operating a cluster relatively incident free since 0.86. On Monday I did a yum update on one node, ceph00, and after rebooting we're seeing every OSD stuck in 'booting' state. I've tried removing all of the OSDs and recreating them with ceph-deploy (ceph-disk required modification to use partx -a rather than partprobe) but we see the same status. I'm not sure how to troubleshoot this further. Our OSDs on this host are now running as the ceph user which may be related to the issue as the other three hosts are running as root (although I followed the steps listed to upgrade from hammer to infernalis and did chown -R ceph:ceph /var/lib/ceph on each node).
[root@ceph00 ceph]# lsb_release -idrc Distributor ID: CentOS Description: CentOS Linux release 7.2.1511 (Core) Release: 7.2.1511 Codename: Core [root@ceph00 ceph]# ceph --version ceph version 9.2.0 (bb2ecea240f3a1d525bcb35670cb07bd1f0ca299) [root@ceph00 ceph]# ceph daemon osd.0 status { "cluster_fsid": "2e4ea2c0-fb62-41fa-b7b7-e34d759b851e", "osd_fsid": "ddf659ad-a3db-4094-b4d0-7d50f34b8f75", "whoami": 0, "state": "booting", "oldest_map": 25243, "newest_map": 26610, "num_pgs": 0 } [root@ceph00 ceph]# ceph daemon osd.3 status { "cluster_fsid": "2e4ea2c0-fb62-41fa-b7b7-e34d759b851e", "osd_fsid": "8b1acd8a-645d-4dc2-8c1d-6dbb1715265f", "whoami": 3, "state": "booting", "oldest_map": 25243, "newest_map": 26612, "num_pgs": 0 } [root@ceph00 ceph]# ceph osd tree ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY -23 1.43999 root ssd -19 0 host ceph00_ssd -20 0.48000 host ceph01_ssd 40 0.48000 osd.40 up 1.00000 1.00000 -21 0.48000 host ceph02_ssd 43 0.48000 osd.43 up 1.00000 1.00000 -22 0.48000 host ceph03_ssd 41 0.48000 osd.41 up 1.00000 1.00000 -1 120.00000 root default -17 80.00000 room b1 -14 40.00000 host ceph01 1 4.00000 osd.1 up 1.00000 1.00000 4 4.00000 osd.4 up 1.00000 1.00000 18 4.00000 osd.18 up 1.00000 1.00000 19 4.00000 osd.19 up 1.00000 1.00000 20 4.00000 osd.20 up 1.00000 1.00000 21 4.00000 osd.21 up 1.00000 1.00000 22 4.00000 osd.22 up 1.00000 1.00000 23 4.00000 osd.23 up 1.00000 1.00000 24 4.00000 osd.24 up 1.00000 1.00000 25 4.00000 osd.25 up 1.00000 1.00000 -16 40.00000 host ceph03 30 4.00000 osd.30 up 1.00000 1.00000 31 4.00000 osd.31 up 1.00000 1.00000 32 4.00000 osd.32 up 1.00000 1.00000 33 4.00000 osd.33 up 1.00000 1.00000 34 4.00000 osd.34 up 1.00000 1.00000 35 4.00000 osd.35 up 1.00000 1.00000 36 4.00000 osd.36 up 1.00000 1.00000 37 4.00000 osd.37 up 1.00000 1.00000 38 4.00000 osd.38 up 1.00000 1.00000 39 4.00000 osd.39 up 1.00000 1.00000 -18 40.00000 room b2 -13 0 host ceph00 -15 40.00000 host ceph02 2 4.00000 osd.2 up 1.00000 1.00000 5 4.00000 osd.5 up 1.00000 1.00000 14 4.00000 osd.14 up 1.00000 1.00000 15 4.00000 osd.15 up 1.00000 1.00000 16 4.00000 osd.16 up 1.00000 1.00000 17 4.00000 osd.17 up 1.00000 1.00000 26 4.00000 osd.26 up 1.00000 1.00000 27 4.00000 osd.27 up 1.00000 1.00000 28 4.00000 osd.28 up 1.00000 1.00000 29 4.00000 osd.29 up 1.00000 1.00000 0 0 osd.0 down 0 1.00000 3 0 osd.3 down 0 1.00000 6 0 osd.6 down 0 1.00000 7 0 osd.7 down 0 1.00000 8 0 osd.8 down 0 1.00000 9 0 osd.9 down 0 1.00000 10 0 osd.10 down 0 1.00000 11 0 osd.11 down 0 1.00000 12 0 osd.12 down 0 1.00000 13 0 osd.13 down 0 1.00000 Any assistance is greatly appreciated. Bob
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com