Hello! I have a cluster of 2 nodes with 3 OSD each. The cluster full about 80%.
df -H
/dev/sdc1 27G 24G 3.9G 86% /var/lib/ceph/osd/ceph-1
/dev/sdd1 27G 20G 6.9G 75% /var/lib/ceph/osd/ceph-2
/dev/sdb1 27G 24G 3.5G 88% /var/lib/ceph/osd/ceph-0
When I switch off one server, then after 10 minutes begins remapped pgs
ceph -w
2016-05-24 09:37:00.065134 mon.0 [INF] pgmap v4850: 704 pgs: 469
active+degraded, 186 active+remapped, 26 active+recovery_wait+remapped, 23
active+recovering+remapped; 56038 MB data, 65904 MB used, 10854 MB / 76759 MB
avail; 11787/28038 objects degraded (42.039%); 6438 kB/s, 1 objects/s recovering
2016-05-24 09:37:03.889165 mon.0 [INF] pgmap v4851: 704 pgs: 469
active+degraded, 186 active+remapped, 26 active+recovery_wait+remapped, 23
active+recovering+remapped; 56038 MB data, 65928 MB used, 10830 MB / 76759 MB
avail; 11786/28038 objects degraded (42.036%); 5753 kB/s, 1 objects/s recovering
2016-05-24 09:37:08.027062 osd.0 [WRN] OSD near full (90%)
2016-05-24 09:37:09.713479 mon.0 [INF] pgmap v4853: 704 pgs: 469
active+degraded, 186 active+remapped, 26 active+recovery_wait+remapped, 23
active+recovering+remapped; 56038 MB data, 65556 MB used, 11203 MB / 76759 MB
avail; 11776/28038 objects degraded (42.000%); 7158 kB/s, 1 objects/s recovering
2016-05-24 09:37:16.232069 mon.0 [INF] pgmap v4854: 704 pgs: 469
active+degraded, 186 active+remapped, 26 active+recovery_wait+remapped, 23
active+recovering+remapped; 56038 MB data, 65626 MB used, 11133 MB / 76759 MB
avail; 11773/28038 objects degraded (41.989%); 5273 kB/s, 1 objects/s recovering
As a result, one disk overflow and the cluster falls. Why ceph remapped pgs, it
was supposed to simply mark all pgs as active+degraded, while second node down?
ceph version 0.80.11
root@ceph1-node:~# cat /etc/ceph/ceph.conf
[global]
fsid = b66c7daa-d6d8-46c7-9e61-15adbb749ed7
mon_initial_members = ceph1-node, ceph2-node, ceph-mon2
mon_host = 192.168.241.97,192.168.241.110,192.168.241.123
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
filestore_xattr_use_omap = true
osd_pool_default_size = 2
osd_pool_default_min_size = 1
mon_clock_drift_allowed = 2
root@ceph1-node:~#cat crush-map.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable straw_calc_version 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host ceph1-node {
id -2 # do not change unnecessarily
# weight 0.060
alg straw
hash 0 # rjenkins1
item osd.0 weight 0.020
item osd.1 weight 0.020
item osd.2 weight 0.020
}
host ceph2-node {
id -3 # do not change unnecessarily
# weight 0.060
alg straw
hash 0 # rjenkins1
item osd.3 weight 0.020
item osd.4 weight 0.020
item osd.5 weight 0.020
}
root default {
id -1 # do not change unnecessarily
# weight 0.120
alg straw
hash 0 # rjenkins1
item ceph1-node weight 0.060
item ceph2-node weight 0.060
}
host ceph1-node2 {
id -4 # do not change unnecessarily
# weight 3.000
alg straw
hash 0 # rjenkins1
item osd.0 weight 1.000
item osd.1 weight 1.000
item osd.2 weight 1.000
}
# rules
rule replicated_ruleset {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
