Your CRUSH rule for EC spools is forcing that behavior with the line step chooseleaf indep 1 type ctnr
If you want different behavior, you’ll need a different crush rule. On Tue, Feb 12, 2019 at 5:18 PM hnuzhoulin2 <[email protected]> wrote: > Hi, cephers > > > I am building a ceph EC cluster.when a disk is error,I out it.But its all > PGs remap to the osds in the same host,which I think they should remap to > other hosts in the same rack. > test process is: > > ceph osd pool create .rgw.buckets.data 8192 8192 erasure ISA-4-2 > site1_sata_erasure_ruleset 400000000 > ceph osd df tree|awk '{print $1" "$2" "$3" "$9" "$10}'> /tmp/1 > /etc/init.d/ceph stop osd.2 > ceph osd out 2 > ceph osd df tree|awk '{print $1" "$2" "$3" "$9" "$10}'> /tmp/2 > diff /tmp/1 /tmp/2 -y --suppress-common-lines > > 0 1.00000 1.00000 118 osd.0 | 0 1.00000 1.00000 126 osd.0 > 1 1.00000 1.00000 123 osd.1 | 1 1.00000 1.00000 139 osd.1 > 2 1.00000 1.00000 122 osd.2 | 2 1.00000 0 0 osd.2 > 3 1.00000 1.00000 113 osd.3 | 3 1.00000 1.00000 131 osd.3 > 4 1.00000 1.00000 122 osd.4 | 4 1.00000 1.00000 136 osd.4 > 5 1.00000 1.00000 112 osd.5 | 5 1.00000 1.00000 127 osd.5 > 6 1.00000 1.00000 114 osd.6 | 6 1.00000 1.00000 128 osd.6 > 7 1.00000 1.00000 124 osd.7 | 7 1.00000 1.00000 136 osd.7 > 8 1.00000 1.00000 95 osd.8 | 8 1.00000 1.00000 113 osd.8 > 9 1.00000 1.00000 112 osd.9 | 9 1.00000 1.00000 119 osd.9 > TOTAL 3073T 197G | TOTAL 3065T 197G > MIN/MAX VAR: 0.84/26.56 | MIN/MAX VAR: 0.84/26.52 > > > some config info: (detail configs see: > https://gist.github.com/hnuzhoulin/575883dbbcb04dff448eea3b9384c125) > jewel 10.2.11 filestore+rocksdb > > ceph osd erasure-code-profile get ISA-4-2 > k=4 > m=2 > plugin=isa > ruleset-failure-domain=ctnr > ruleset-root=site1-sata > technique=reed_sol_van > > part of ceph.conf is: > > [global] > fsid = 1CAB340D-E551-474F-B21A-399AC0F10900 > auth cluster required = cephx > auth service required = cephx > auth client required = cephx > pid file = /home/ceph/var/run/$name.pid > log file = /home/ceph/log/$cluster-$name.log > mon osd nearfull ratio = 0.85 > mon osd full ratio = 0.95 > admin socket = /home/ceph/var/run/$cluster-$name.asok > osd pool default size = 3 > osd pool default min size = 1 > osd objectstore = filestore > filestore merge threshold = -10 > > [mon] > keyring = /home/ceph/var/lib/$type/$cluster-$id/keyring > mon data = /home/ceph/var/lib/$type/$cluster-$id > mon cluster log file = /home/ceph/log/$cluster.log > [osd] > keyring = /home/ceph/var/lib/$type/$cluster-$id/keyring > osd data = /home/ceph/var/lib/$type/$cluster-$id > osd journal = /home/ceph/var/lib/$type/$cluster-$id/journal > osd journal size = 10000 > osd mkfs type = xfs > osd mount options xfs = rw,noatime,nodiratime,inode64,logbsize=256k > osd backfill full ratio = 0.92 > osd failsafe full ratio = 0.95 > osd failsafe nearfull ratio = 0.85 > osd max backfills = 1 > osd crush update on start = false > osd op thread timeout = 60 > filestore split multiple = 8 > filestore max sync interval = 15 > filestore min sync interval = 5 > [osd.0] > host = cld-osd1-56 > addr = XXXXX > user = ceph > devs = /disk/link/osd-0/data > osd journal = /disk/link/osd-0/journal > ……. > [osd.503] > host = cld-osd42-56 > addr = 10.108.87.52 > user = ceph > devs = /disk/link/osd-503/data > osd journal = /disk/link/osd-503/journal > > > crushmap is below: > > # begin crush map > tunable choose_local_tries 0 > tunable choose_local_fallback_tries 0 > tunable choose_total_tries 50 > tunable chooseleaf_descend_once 1 > tunable chooseleaf_vary_r 1 > tunable straw_calc_version 1 > tunable allowed_bucket_algs 54 > > # devices > device 0 osd.0 > device 1 osd.1 > device 2 osd.2 > 。。。 > device 502 osd.502 > device 503 osd.503 > > # types > type 0 osd # osd > type 1 ctnr # sata/ssd group by node, -101~1xx/-201~2xx > type 2 media # sata/ssd group by rack, -11~1x/-21~2x > type 3 mediagroup # sata/ssd group by site, -5/-6 > type 4 unit # site, -2 > type 5 root # root, -1 > > # buckets > ctnr cld-osd1-56-sata { > id -101 # do not change unnecessarily > # weight 10.000 > alg straw2 > hash 0 # rjenkins1 > item osd.0 weight 1.000 > item osd.1 weight 1.000 > item osd.2 weight 1.000 > item osd.3 weight 1.000 > item osd.4 weight 1.000 > item osd.5 weight 1.000 > item osd.6 weight 1.000 > item osd.7 weight 1.000 > item osd.8 weight 1.000 > item osd.9 weight 1.000 > } > ctnr cld-osd1-56-ssd { > id -201 # do not change unnecessarily > # weight 2.000 > alg straw2 > hash 0 # rjenkins1 > item osd.10 weight 1.000 > item osd.11 weight 1.000 > } > ….. > ctnr cld-osd41-56-sata { > id -141 # do not change unnecessarily > # weight 10.000 > alg straw2 > hash 0 # rjenkins1 > item osd.480 weight 1.000 > item osd.481 weight 1.000 > item osd.482 weight 1.000 > item osd.483 weight 1.000 > item osd.484 weight 1.000 > item osd.485 weight 1.000 > item osd.486 weight 1.000 > item osd.487 weight 1.000 > item osd.488 weight 1.000 > item osd.489 weight 1.000 > } > ctnr cld-osd41-56-ssd { > id -241 # do not change unnecessarily > # weight 2.000 > alg straw2 > hash 0 # rjenkins1 > item osd.490 weight 1.000 > item osd.491 weight 1.000 > } > ctnr cld-osd42-56-sata { > id -142 # do not change unnecessarily > # weight 10.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd29-56-sata weight 10.000 > item cld-osd30-56-sata weight 10.000 > item cld-osd31-56-sata weight 10.000 > item cld-osd32-56-sata weight 10.000 > item cld-osd33-56-sata weight 10.000 > item cld-osd34-56-sata weight 10.000 > item cld-osd35-56-sata weight 10.000 > } > > > media site1-rack1-sata { > id -11 # do not change unnecessarily > # weight 70.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd1-56-sata weight 10.000 > item cld-osd2-56-sata weight 10.000 > item cld-osd3-56-sata weight 10.000 > item cld-osd4-56-sata weight 10.000 > item cld-osd5-56-sata weight 10.000 > item cld-osd6-56-sata weight 10.000 > item cld-osd7-56-sata weight 10.000 > } > media site1-rack2-sata { > id -12 # do not change unnecessarily > # weight 70.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd8-56-sata weight 10.000 > item cld-osd9-56-sata weight 10.000 > item cld-osd10-56-sata weight 10.000 > item cld-osd11-56-sata weight 10.000 > item cld-osd12-56-sata weight 10.000 > item cld-osd13-56-sata weight 10.000 > item cld-osd14-56-sata weight 10.000 > } > media site1-rack3-sata { > id -13 # do not change unnecessarily > # weight 70.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd15-56-sata weight 10.000 > item cld-osd16-56-sata weight 10.000 > item cld-osd17-56-sata weight 10.000 > item cld-osd18-56-sata weight 10.000 > item cld-osd19-56-sata weight 10.000 > item cld-osd20-56-sata weight 10.000 > item cld-osd21-56-sata weight 10.000 > } > media site1-rack4-sata { > id -14 # do not change unnecessarily > # weight 70.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd22-56-sata weight 10.000 > item cld-osd23-56-sata weight 10.000 > item cld-osd24-56-sata weight 10.000 > item cld-osd25-56-sata weight 10.000 > item cld-osd26-56-sata weight 10.000 > item cld-osd27-56-sata weight 10.000 > item cld-osd28-56-sata weight 10.000 > } > media site1-rack5-sata { > id -15 # do not change unnecessarily > # weight 70.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd29-56-sata weight 10.000 > item cld-osd30-56-sata weight 10.000 > item cld-osd31-56-sata weight 10.000 > item cld-osd32-56-sata weight 10.000 > item cld-osd33-56-sata weight 10.000 > item cld-osd34-56-sata weight 10.000 > item cld-osd35-56-sata weight 10.000 > } > media site1-rack6-sata { > id -16 # do not change unnecessarily > # weight 70.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd36-56-sata weight 10.000 > item cld-osd37-56-sata weight 10.000 > item cld-osd38-56-sata weight 10.000 > item cld-osd39-56-sata weight 10.000 > item cld-osd40-56-sata weight 10.000 > item cld-osd41-56-sata weight 10.000 > item cld-osd42-56-sata weight 10.000 > } > > media site1-rack1-ssd { > id -21 # do not change unnecessarily > # weight 14.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd1-56-ssd weight 2.000 > item cld-osd2-56-ssd weight 2.000 > item cld-osd3-56-ssd weight 2.000 > item cld-osd4-56-ssd weight 2.000 > item cld-osd5-56-ssd weight 2.000 > item cld-osd6-56-ssd weight 2.000 > item cld-osd7-56-ssd weight 2.000 > item cld-osd8-56-ssd weight 2.000 > item cld-osd9-56-ssd weight 2.000 > item cld-osd10-56-ssd weight 2.000 > item cld-osd11-56-ssd weight 2.000 > item cld-osd12-56-ssd weight 2.000 > item cld-osd13-56-ssd weight 2.000 > item cld-osd14-56-ssd weight 2.000 > } > media site1-rack2-ssd { > id -22 # do not change unnecessarily > # weight 14.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd15-56-ssd weight 2.000 > item cld-osd16-56-ssd weight 2.000 > item cld-osd17-56-ssd weight 2.000 > item cld-osd18-56-ssd weight 2.000 > item cld-osd19-56-ssd weight 2.000 > item cld-osd20-56-ssd weight 2.000 > item cld-osd21-56-ssd weight 2.000 > item cld-osd22-56-ssd weight 2.000 > item cld-osd23-56-ssd weight 2.000 > item cld-osd24-56-ssd weight 2.000 > item cld-osd25-56-ssd weight 2.000 > item cld-osd26-56-ssd weight 2.000 > item cld-osd27-56-ssd weight 2.000 > item cld-osd28-56-ssd weight 2.000 > } > media site1-rack3-ssd { > id -23 # do not change unnecessarily > # weight 14.000 > alg straw2 > hash 0 # rjenkins1 > item cld-osd29-56-ssd weight 2.000 > item cld-osd30-56-ssd weight 2.000 > item cld-osd31-56-ssd weight 2.000 > item cld-osd32-56-ssd weight 2.000 > item cld-osd33-56-ssd weight 2.000 > item cld-osd34-56-ssd weight 2.000 > item cld-osd35-56-ssd weight 2.000 > item cld-osd36-56-ssd weight 2.000 > item cld-osd37-56-ssd weight 2.000 > item cld-osd38-56-ssd weight 2.000 > item cld-osd39-56-ssd weight 2.000 > item cld-osd40-56-ssd weight 2.000 > item cld-osd41-56-ssd weight 2.000 > item cld-osd42-56-ssd weight 2.000 > } > mediagroup site1-sata { > id -5 # do not change unnecessarily > # weight 420.000 > alg straw2 > hash 0 # rjenkins1 > item site1-rack1-sata weight 70.000 > item site1-rack2-sata weight 70.000 > item site1-rack3-sata weight 70.000 > item site1-rack4-sata weight 70.000 > item site1-rack5-sata weight 70.000 > item site1-rack6-sata weight 70.000 > } > mediagroup site1-ssd { > id -6 # do not change unnecessarily > # weight 84.000 > alg straw2 > hash 0 # rjenkins1 > item site1-rack1-ssd weight 28.000 > item site1-rack2-ssd weight 28.000 > item site1-rack3-ssd weight 28.000 > } > > unit site1 { > id -2 # do not change unnecessarily > # weight 504.000 > alg straw2 > hash 0 # rjenkins1 > item site1-sata weight 420.000 > item site1-ssd weight 84.000 > } > > root default { > id -1 # do not change unnecessarily > # weight 504.000 > alg straw2 > hash 0 # rjenkins1 > item site1 weight 504.000 > } > # rules > rule site1_sata_erasure_ruleset { > ruleset 0 > type erasure > min_size 3 > max_size 6 > step set_chooseleaf_tries 5 > step set_choose_tries 100 > step take site1-sata > step choose indep 0 type media > step chooseleaf indep 1 type ctnr > step emit > } > rule site1_ssd_replicated_ruleset { > ruleset 1 > type replicated > min_size 1 > max_size 10 > step take site1-ssd > step choose firstn 0 type media > step chooseleaf firstn 1 type ctnr > step emit > } > # end crush map > > _______________________________________________ > ceph-users mailing list > [email protected] > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >
_______________________________________________ ceph-users mailing list [email protected] http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
