I just built a small lab cluster. 1 mon node, 3 osd nodes with 3 ceph disks
and 1 os/journal disk, an admin vm and 3 client vm's.
I followed the preflight and install instructions and when I finished adding
the osd's I ran a ceph status and got the following:
ceph> status
cluster b4419183-5320-4701-aae2-eb61e186b443
health HEALTH_WARN
32 pgs degraded
64 pgs stale
32 pgs stuck degraded
246 pgs stuck inactive
64 pgs stuck stale
310 pgs stuck unclean
32 pgs stuck undersized
32 pgs undersized
pool rbd pg_num 310 > pgp_num 64
monmap e1: 1 mons at {mon=172.17.1.16:6789/0}
election epoch 2, quorum 0 mon
osdmap e49: 11 osds: 9 up, 9 in
pgmap v122: 310 pgs, 1 pools, 0 bytes data, 0 objects
298 MB used, 4189 GB / 4189 GB avail
246 creating
32 stale+active+undersized+degraded
32 stale+active+remapped
ceph> health
HEALTH_WARN 32 pgs degraded; 64 pgs stale; 32 pgs stuck degraded; 246 pgs stuck
inactive; 64 pgs stuck stale; 310 pgs stuck unclean; 32 pgs stuck undersized;
32 pgs undersized; pool rbd pg_num 310 > pgp_num 64
ceph> quorum_status
{"election_epoch":2,"quorum":[0],"quorum_names":["mon"],"quorum_leader_name":"mon","monmap":{"epoch":1,"fsid":"b4419183-5320-4701-aae2-eb61e186b443","modified":"0.000000","created":"0.000000","mons":[{"rank":0,"name":"mon","addr":"172.17.1.16:6789\/0"}]}}
ceph> mon_status
{"name":"mon","rank":0,"state":"leader","election_epoch":2,"quorum":[0],"outside_quorum":[],"extra_probe_peers":[],"sync_provider":[],"monmap":{"epoch":1,"fsid":"b4419183-5320-4701-aae2-eb61e186b443","modified":"0.000000","created":"0.000000","mons":[{"rank":0,"name":"mon","addr":"172.17.1.16:6789\/0"}]}}
ceph> osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-1 4.94997 root default
-2 2.24998 host osd1
0 0.45000 osd.0 down 0 1.00000
1 0.45000 osd.1 down 0 1.00000
2 0.45000 osd.2 up 1.00000 1.00000
3 0.45000 osd.3 up 1.00000 1.00000
10 0.45000 osd.10 up 1.00000 1.00000
-3 1.34999 host osd2
4 0.45000 osd.4 up 1.00000 1.00000
5 0.45000 osd.5 up 1.00000 1.00000
6 0.45000 osd.6 up 1.00000 1.00000
-4 1.34999 host osd3
7 0.45000 osd.7 up 1.00000 1.00000
8 0.45000 osd.8 up 1.00000 1.00000
9 0.45000 osd.9 up 1.00000 1.00000
Admin-node:
[root@admin test-cluster]# cat ceph.conf
[global]
auth_service_required = cephx
filestore_xattr_use_omap = true
auth_client_required = cephx
auth_cluster_required = cephx
mon_host = 172.17.1.16
mon_initial_members = mon
fsid = b4419183-5320-4701-aae2-eb61e186b443
osd pool default size = 2
public network = 172.17.1.0/24
cluster network = 10.0.0.0/24
How do I diagnose and solve the cluster health issue? Do you need any
additional information to help with the diag process?
Thanks!!
Dave
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com