Actually I think I misread what this was doing, sorry. Can you do a “ceph osd tree”? It’s hard to see the structure via the text dumps.
On Wed, Feb 13, 2019 at 10:49 AM Gregory Farnum <gfar...@redhat.com> wrote: > Your CRUSH rule for EC spools is forcing that behavior with the line > > step chooseleaf indep 1 type ctnr > > If you want different behavior, you’ll need a different crush rule. > > On Tue, Feb 12, 2019 at 5:18 PM hnuzhoulin2 <hnuzhoul...@gmail.com> wrote: > >> Hi, cephers >> >> >> I am building a ceph EC cluster.when a disk is error,I out it.But its all >> PGs remap to the osds in the same host,which I think they should remap to >> other hosts in the same rack. >> test process is: >> >> ceph osd pool create .rgw.buckets.data 8192 8192 erasure ISA-4-2 >> site1_sata_erasure_ruleset 400000000 >> ceph osd df tree|awk '{print $1" "$2" "$3" "$9" "$10}'> /tmp/1 >> /etc/init.d/ceph stop osd.2 >> ceph osd out 2 >> ceph osd df tree|awk '{print $1" "$2" "$3" "$9" "$10}'> /tmp/2 >> diff /tmp/1 /tmp/2 -y --suppress-common-lines >> >> 0 1.00000 1.00000 118 osd.0 | 0 1.00000 1.00000 126 osd.0 >> 1 1.00000 1.00000 123 osd.1 | 1 1.00000 1.00000 139 osd.1 >> 2 1.00000 1.00000 122 osd.2 | 2 1.00000 0 0 osd.2 >> 3 1.00000 1.00000 113 osd.3 | 3 1.00000 1.00000 131 osd.3 >> 4 1.00000 1.00000 122 osd.4 | 4 1.00000 1.00000 136 osd.4 >> 5 1.00000 1.00000 112 osd.5 | 5 1.00000 1.00000 127 osd.5 >> 6 1.00000 1.00000 114 osd.6 | 6 1.00000 1.00000 128 osd.6 >> 7 1.00000 1.00000 124 osd.7 | 7 1.00000 1.00000 136 osd.7 >> 8 1.00000 1.00000 95 osd.8 | 8 1.00000 1.00000 113 osd.8 >> 9 1.00000 1.00000 112 osd.9 | 9 1.00000 1.00000 119 osd.9 >> TOTAL 3073T 197G | TOTAL 3065T 197G >> MIN/MAX VAR: 0.84/26.56 | MIN/MAX VAR: 0.84/26.52 >> >> >> some config info: (detail configs see: >> https://gist.github.com/hnuzhoulin/575883dbbcb04dff448eea3b9384c125) >> jewel 10.2.11 filestore+rocksdb >> >> ceph osd erasure-code-profile get ISA-4-2 >> k=4 >> m=2 >> plugin=isa >> ruleset-failure-domain=ctnr >> ruleset-root=site1-sata >> technique=reed_sol_van >> >> part of ceph.conf is: >> >> [global] >> fsid = 1CAB340D-E551-474F-B21A-399AC0F10900 >> auth cluster required = cephx >> auth service required = cephx >> auth client required = cephx >> pid file = /home/ceph/var/run/$name.pid >> log file = /home/ceph/log/$cluster-$name.log >> mon osd nearfull ratio = 0.85 >> mon osd full ratio = 0.95 >> admin socket = /home/ceph/var/run/$cluster-$name.asok >> osd pool default size = 3 >> osd pool default min size = 1 >> osd objectstore = filestore >> filestore merge threshold = -10 >> >> [mon] >> keyring = /home/ceph/var/lib/$type/$cluster-$id/keyring >> mon data = /home/ceph/var/lib/$type/$cluster-$id >> mon cluster log file = /home/ceph/log/$cluster.log >> [osd] >> keyring = /home/ceph/var/lib/$type/$cluster-$id/keyring >> osd data = /home/ceph/var/lib/$type/$cluster-$id >> osd journal = /home/ceph/var/lib/$type/$cluster-$id/journal >> osd journal size = 10000 >> osd mkfs type = xfs >> osd mount options xfs = rw,noatime,nodiratime,inode64,logbsize=256k >> osd backfill full ratio = 0.92 >> osd failsafe full ratio = 0.95 >> osd failsafe nearfull ratio = 0.85 >> osd max backfills = 1 >> osd crush update on start = false >> osd op thread timeout = 60 >> filestore split multiple = 8 >> filestore max sync interval = 15 >> filestore min sync interval = 5 >> [osd.0] >> host = cld-osd1-56 >> addr = XXXXX >> user = ceph >> devs = /disk/link/osd-0/data >> osd journal = /disk/link/osd-0/journal >> ……. >> [osd.503] >> host = cld-osd42-56 >> addr = 10.108.87.52 >> user = ceph >> devs = /disk/link/osd-503/data >> osd journal = /disk/link/osd-503/journal >> >> >> crushmap is below: >> >> # begin crush map >> tunable choose_local_tries 0 >> tunable choose_local_fallback_tries 0 >> tunable choose_total_tries 50 >> tunable chooseleaf_descend_once 1 >> tunable chooseleaf_vary_r 1 >> tunable straw_calc_version 1 >> tunable allowed_bucket_algs 54 >> >> # devices >> device 0 osd.0 >> device 1 osd.1 >> device 2 osd.2 >> 。。。 >> device 502 osd.502 >> device 503 osd.503 >> >> # types >> type 0 osd # osd >> type 1 ctnr # sata/ssd group by node, -101~1xx/-201~2xx >> type 2 media # sata/ssd group by rack, -11~1x/-21~2x >> type 3 mediagroup # sata/ssd group by site, -5/-6 >> type 4 unit # site, -2 >> type 5 root # root, -1 >> >> # buckets >> ctnr cld-osd1-56-sata { >> id -101 # do not change unnecessarily >> # weight 10.000 >> alg straw2 >> hash 0 # rjenkins1 >> item osd.0 weight 1.000 >> item osd.1 weight 1.000 >> item osd.2 weight 1.000 >> item osd.3 weight 1.000 >> item osd.4 weight 1.000 >> item osd.5 weight 1.000 >> item osd.6 weight 1.000 >> item osd.7 weight 1.000 >> item osd.8 weight 1.000 >> item osd.9 weight 1.000 >> } >> ctnr cld-osd1-56-ssd { >> id -201 # do not change unnecessarily >> # weight 2.000 >> alg straw2 >> hash 0 # rjenkins1 >> item osd.10 weight 1.000 >> item osd.11 weight 1.000 >> } >> ….. >> ctnr cld-osd41-56-sata { >> id -141 # do not change unnecessarily >> # weight 10.000 >> alg straw2 >> hash 0 # rjenkins1 >> item osd.480 weight 1.000 >> item osd.481 weight 1.000 >> item osd.482 weight 1.000 >> item osd.483 weight 1.000 >> item osd.484 weight 1.000 >> item osd.485 weight 1.000 >> item osd.486 weight 1.000 >> item osd.487 weight 1.000 >> item osd.488 weight 1.000 >> item osd.489 weight 1.000 >> } >> ctnr cld-osd41-56-ssd { >> id -241 # do not change unnecessarily >> # weight 2.000 >> alg straw2 >> hash 0 # rjenkins1 >> item osd.490 weight 1.000 >> item osd.491 weight 1.000 >> } >> ctnr cld-osd42-56-sata { >> id -142 # do not change unnecessarily >> # weight 10.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd29-56-sata weight 10.000 >> item cld-osd30-56-sata weight 10.000 >> item cld-osd31-56-sata weight 10.000 >> item cld-osd32-56-sata weight 10.000 >> item cld-osd33-56-sata weight 10.000 >> item cld-osd34-56-sata weight 10.000 >> item cld-osd35-56-sata weight 10.000 >> } >> >> >> media site1-rack1-sata { >> id -11 # do not change unnecessarily >> # weight 70.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd1-56-sata weight 10.000 >> item cld-osd2-56-sata weight 10.000 >> item cld-osd3-56-sata weight 10.000 >> item cld-osd4-56-sata weight 10.000 >> item cld-osd5-56-sata weight 10.000 >> item cld-osd6-56-sata weight 10.000 >> item cld-osd7-56-sata weight 10.000 >> } >> media site1-rack2-sata { >> id -12 # do not change unnecessarily >> # weight 70.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd8-56-sata weight 10.000 >> item cld-osd9-56-sata weight 10.000 >> item cld-osd10-56-sata weight 10.000 >> item cld-osd11-56-sata weight 10.000 >> item cld-osd12-56-sata weight 10.000 >> item cld-osd13-56-sata weight 10.000 >> item cld-osd14-56-sata weight 10.000 >> } >> media site1-rack3-sata { >> id -13 # do not change unnecessarily >> # weight 70.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd15-56-sata weight 10.000 >> item cld-osd16-56-sata weight 10.000 >> item cld-osd17-56-sata weight 10.000 >> item cld-osd18-56-sata weight 10.000 >> item cld-osd19-56-sata weight 10.000 >> item cld-osd20-56-sata weight 10.000 >> item cld-osd21-56-sata weight 10.000 >> } >> media site1-rack4-sata { >> id -14 # do not change unnecessarily >> # weight 70.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd22-56-sata weight 10.000 >> item cld-osd23-56-sata weight 10.000 >> item cld-osd24-56-sata weight 10.000 >> item cld-osd25-56-sata weight 10.000 >> item cld-osd26-56-sata weight 10.000 >> item cld-osd27-56-sata weight 10.000 >> item cld-osd28-56-sata weight 10.000 >> } >> media site1-rack5-sata { >> id -15 # do not change unnecessarily >> # weight 70.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd29-56-sata weight 10.000 >> item cld-osd30-56-sata weight 10.000 >> item cld-osd31-56-sata weight 10.000 >> item cld-osd32-56-sata weight 10.000 >> item cld-osd33-56-sata weight 10.000 >> item cld-osd34-56-sata weight 10.000 >> item cld-osd35-56-sata weight 10.000 >> } >> media site1-rack6-sata { >> id -16 # do not change unnecessarily >> # weight 70.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd36-56-sata weight 10.000 >> item cld-osd37-56-sata weight 10.000 >> item cld-osd38-56-sata weight 10.000 >> item cld-osd39-56-sata weight 10.000 >> item cld-osd40-56-sata weight 10.000 >> item cld-osd41-56-sata weight 10.000 >> item cld-osd42-56-sata weight 10.000 >> } >> >> media site1-rack1-ssd { >> id -21 # do not change unnecessarily >> # weight 14.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd1-56-ssd weight 2.000 >> item cld-osd2-56-ssd weight 2.000 >> item cld-osd3-56-ssd weight 2.000 >> item cld-osd4-56-ssd weight 2.000 >> item cld-osd5-56-ssd weight 2.000 >> item cld-osd6-56-ssd weight 2.000 >> item cld-osd7-56-ssd weight 2.000 >> item cld-osd8-56-ssd weight 2.000 >> item cld-osd9-56-ssd weight 2.000 >> item cld-osd10-56-ssd weight 2.000 >> item cld-osd11-56-ssd weight 2.000 >> item cld-osd12-56-ssd weight 2.000 >> item cld-osd13-56-ssd weight 2.000 >> item cld-osd14-56-ssd weight 2.000 >> } >> media site1-rack2-ssd { >> id -22 # do not change unnecessarily >> # weight 14.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd15-56-ssd weight 2.000 >> item cld-osd16-56-ssd weight 2.000 >> item cld-osd17-56-ssd weight 2.000 >> item cld-osd18-56-ssd weight 2.000 >> item cld-osd19-56-ssd weight 2.000 >> item cld-osd20-56-ssd weight 2.000 >> item cld-osd21-56-ssd weight 2.000 >> item cld-osd22-56-ssd weight 2.000 >> item cld-osd23-56-ssd weight 2.000 >> item cld-osd24-56-ssd weight 2.000 >> item cld-osd25-56-ssd weight 2.000 >> item cld-osd26-56-ssd weight 2.000 >> item cld-osd27-56-ssd weight 2.000 >> item cld-osd28-56-ssd weight 2.000 >> } >> media site1-rack3-ssd { >> id -23 # do not change unnecessarily >> # weight 14.000 >> alg straw2 >> hash 0 # rjenkins1 >> item cld-osd29-56-ssd weight 2.000 >> item cld-osd30-56-ssd weight 2.000 >> item cld-osd31-56-ssd weight 2.000 >> item cld-osd32-56-ssd weight 2.000 >> item cld-osd33-56-ssd weight 2.000 >> item cld-osd34-56-ssd weight 2.000 >> item cld-osd35-56-ssd weight 2.000 >> item cld-osd36-56-ssd weight 2.000 >> item cld-osd37-56-ssd weight 2.000 >> item cld-osd38-56-ssd weight 2.000 >> item cld-osd39-56-ssd weight 2.000 >> item cld-osd40-56-ssd weight 2.000 >> item cld-osd41-56-ssd weight 2.000 >> item cld-osd42-56-ssd weight 2.000 >> } >> mediagroup site1-sata { >> id -5 # do not change unnecessarily >> # weight 420.000 >> alg straw2 >> hash 0 # rjenkins1 >> item site1-rack1-sata weight 70.000 >> item site1-rack2-sata weight 70.000 >> item site1-rack3-sata weight 70.000 >> item site1-rack4-sata weight 70.000 >> item site1-rack5-sata weight 70.000 >> item site1-rack6-sata weight 70.000 >> } >> mediagroup site1-ssd { >> id -6 # do not change unnecessarily >> # weight 84.000 >> alg straw2 >> hash 0 # rjenkins1 >> item site1-rack1-ssd weight 28.000 >> item site1-rack2-ssd weight 28.000 >> item site1-rack3-ssd weight 28.000 >> } >> >> unit site1 { >> id -2 # do not change unnecessarily >> # weight 504.000 >> alg straw2 >> hash 0 # rjenkins1 >> item site1-sata weight 420.000 >> item site1-ssd weight 84.000 >> } >> >> root default { >> id -1 # do not change unnecessarily >> # weight 504.000 >> alg straw2 >> hash 0 # rjenkins1 >> item site1 weight 504.000 >> } >> # rules >> rule site1_sata_erasure_ruleset { >> ruleset 0 >> type erasure >> min_size 3 >> max_size 6 >> step set_chooseleaf_tries 5 >> step set_choose_tries 100 >> step take site1-sata >> step choose indep 0 type media >> step chooseleaf indep 1 type ctnr >> step emit >> } >> rule site1_ssd_replicated_ruleset { >> ruleset 1 >> type replicated >> min_size 1 >> max_size 10 >> step take site1-ssd >> step choose firstn 0 type media >> step chooseleaf firstn 1 type ctnr >> step emit >> } >> # end crush map >> >> _______________________________________________ >> ceph-users mailing list >> ceph-users@lists.ceph.com >> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >> >
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com