Hello,

Recently we got an underlying issue with osd.10 which mapped to /dev/sde .
So we tried to removed it from the crush

===
#systemctl stop ceph-osd@10.service

#for x in {10..10}; do ceph osd out $x;ceph osd crush remove osd.$x;ceph
auth del osd.$x;ceph osd rm osd.$x ;done
marked out osd.10.
removed item id 10 name 'osd.10' from crush map
updated
removed osd.10


Still I can see the entry in crush
#ceph osd crush dump
<<..>>
{
            "id": 10,
            "name": "device10"
        },
<<..>>

Then I tried to manually removed using crush tool by below steps.

#ceph osd getcrushmap -o /tmp/test.map
#crushtool -d /tmp/test.map -o /tmp/test1.map

Opened file /tmp/test1.map and removed
#vim /tmp/test1.map
<..>
device 9 osd.9
device 10 device10  --<>>>>>>>> removed this entry
device 11 osd.11
<..>

#crushtool -c /tmp/test1.map -o /tmp/test2.map
#ceph osd setcrushmap -i /tmp/test2.map  -- Reinject to crush


Still i can see the device10 info in the crush
#ceph osd crush dump 2> /dev/null | grep device10
            "name": "device10"

Even i tried below command, no luck...
#ceph osd crush rm osd.10
#ceph osd crush rm 10
#ceph osd crush rm device0


Due to this issue, 9 PG's got affected and landed in "remapped+incomplete"
state.

# for i in `cat test`; do  ceph pg map $i 2> /dev/null; done
osdmap e2443 pg 3.d9 (3.d9) -> up [8,63,77,35,117] acting
[2147483647,63,77,2147483647,117]
osdmap e2443 pg 3.9f (3.9f) -> up [80,47,116,19,3] acting
[80,2147483647,116,2147483647,3]
osdmap e2443 pg 3.7fe (3.7fe) -> up [17,27,93,23,102] acting
[17,27,93,2147483647,102]
osdmap e2443 pg 3.32f (3.32f) -> up [64,69,94,111,20] acting
[2147483647,69,94,111,2147483647]
osdmap e2443 pg 3.34f (3.34f) -> up [102,25,90,1,24] acting
[102,2147483647,90,2147483647,24]
osdmap e2443 pg 3.176 (3.176) -> up [9,2,107,13,91] acting
[9,2,107,2147483647,91]
osdmap e2443 pg 3.10e (3.10e) -> up [88,61,21,59,100] acting
[2147483647,2147483647,21,2147483647,2147483647]
osdmap e2443 pg 3.48 (3.48) -> up [114,18,32,90,8] acting
[114,18,2147483647,90,8]
osdmap e2443 pg 3.71a (3.71a) -> up [3,78,58,71,116] acting
[3,78,58,2147483647,116]

#  ceph pg  $i query 2> /dev/null| grep -w -A1 "blocked_by\"\: \[" | grep
-v -
            "blocked_by": [
                10  ==>>>

#ceph pg  $i query 2> /dev/null| grep -w -A1 down_osds_we_would_probe
            "down_osds_we_would_probe": [  -->>
                10

Then I tried to recreate the pg using below command

#ceph pg force_create_pg id

Still no luck ...

Here the osd.10 is still present in crush which caused I'm unable to
recover these 9 PG's . Whenever we reboot that affected OSD.10 node which
leads the osd join back to the cluster again which is weired. >>

Comments please how to forcefully remove device10 / osd.10 info from the
crush.

Attached crushmap file.

Thanks
Jayaram
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54

# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 device10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 osd.16
device 17 osd.17
device 18 osd.18
device 19 osd.19
device 20 osd.20
device 21 osd.21
device 22 osd.22
device 23 osd.23
device 24 osd.24
device 25 osd.25
device 26 osd.26
device 27 osd.27
device 28 osd.28
device 29 osd.29
device 30 osd.30
device 31 osd.31
device 32 osd.32
device 33 osd.33
device 34 osd.34
device 35 osd.35
device 36 osd.36
device 37 osd.37
device 38 osd.38
device 39 osd.39
device 40 osd.40
device 41 osd.41
device 42 osd.42
device 43 osd.43
device 44 osd.44
device 45 osd.45
device 46 osd.46
device 47 osd.47
device 48 osd.48
device 49 osd.49
device 50 osd.50
device 51 osd.51
device 52 osd.52
device 53 osd.53
device 54 osd.54
device 55 osd.55
device 56 osd.56
device 57 osd.57
device 58 osd.58
device 59 osd.59
device 60 osd.60
device 61 osd.61
device 62 osd.62
device 63 osd.63
device 64 osd.64
device 65 osd.65
device 66 osd.66
device 67 osd.67
device 68 osd.68
device 69 osd.69
device 70 osd.70
device 71 osd.71
device 72 osd.72
device 73 osd.73
device 74 osd.74
device 75 osd.75
device 76 osd.76
device 77 osd.77
device 78 osd.78
device 79 osd.79
device 80 osd.80
device 81 osd.81
device 82 osd.82
device 83 osd.83
device 84 osd.84
device 85 osd.85
device 86 osd.86
device 87 osd.87
device 88 osd.88
device 89 osd.89
device 90 osd.90
device 91 osd.91
device 92 osd.92
device 93 osd.93
device 94 osd.94
device 95 osd.95
device 96 osd.96
device 97 osd.97
device 98 osd.98
device 99 osd.99
device 100 osd.100
device 101 osd.101
device 102 osd.102
device 103 osd.103
device 104 osd.104
device 105 osd.105
device 106 osd.106
device 107 osd.107
device 108 osd.108
device 109 osd.109
device 110 osd.110
device 111 osd.111
device 112 osd.112
device 113 osd.113
device 114 osd.114
device 115 osd.115
device 116 osd.116
device 117 osd.117
device 118 osd.118
device 119 osd.119

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host cn3 {
        id -2           # do not change unnecessarily
        # weight 130.992
        alg straw2
        hash 0  # rjenkins1
        item osd.0 weight 5.458
        item osd.3 weight 5.458
        item osd.6 weight 5.458
        item osd.9 weight 5.458
        item osd.12 weight 5.458
        item osd.15 weight 5.458
        item osd.18 weight 5.458
        item osd.21 weight 5.458
        item osd.24 weight 5.458
        item osd.27 weight 5.458
        item osd.30 weight 5.458
        item osd.33 weight 5.458
        item osd.36 weight 5.458
        item osd.39 weight 5.458
        item osd.42 weight 5.458
        item osd.45 weight 5.458
        item osd.48 weight 5.458
        item osd.51 weight 5.458
        item osd.54 weight 5.458
        item osd.57 weight 5.458
        item osd.60 weight 5.458
        item osd.63 weight 5.458
        item osd.66 weight 5.458
        item osd.69 weight 5.458
}
host cn2 {
        id -3           # do not change unnecessarily
        # weight 125.534
        alg straw2
        hash 0  # rjenkins1
        item osd.1 weight 5.458
        item osd.4 weight 5.458
        item osd.7 weight 5.458
        item osd.13 weight 5.458
        item osd.16 weight 5.458
        item osd.20 weight 5.458
        item osd.23 weight 5.458
        item osd.26 weight 5.458
        item osd.29 weight 5.458
        item osd.32 weight 5.458
        item osd.35 weight 5.458
        item osd.38 weight 5.458
        item osd.41 weight 5.458
        item osd.44 weight 5.458
        item osd.47 weight 5.458
        item osd.50 weight 5.458
        item osd.53 weight 5.458
        item osd.56 weight 5.458
        item osd.59 weight 5.458
        item osd.62 weight 5.458
        item osd.65 weight 5.458
        item osd.68 weight 5.458
        item osd.71 weight 5.458
}
host cn1 {
        id -4           # do not change unnecessarily
        # weight 130.992
        alg straw2
        hash 0  # rjenkins1
        item osd.2 weight 5.458
        item osd.5 weight 5.458
        item osd.8 weight 5.458
        item osd.11 weight 5.458
        item osd.14 weight 5.458
        item osd.17 weight 5.458
        item osd.19 weight 5.458
        item osd.22 weight 5.458
        item osd.25 weight 5.458
        item osd.28 weight 5.458
        item osd.31 weight 5.458
        item osd.34 weight 5.458
        item osd.37 weight 5.458
        item osd.40 weight 5.458
        item osd.43 weight 5.458
        item osd.46 weight 5.458
        item osd.49 weight 5.458
        item osd.52 weight 5.458
        item osd.55 weight 5.458
        item osd.58 weight 5.458
        item osd.61 weight 5.458
        item osd.64 weight 5.458
        item osd.67 weight 5.458
        item osd.70 weight 5.458
}
host cn4 {
        id -5           # do not change unnecessarily
        # weight 130.992
        alg straw2
        hash 0  # rjenkins1
        item osd.72 weight 5.458
        item osd.73 weight 5.458
        item osd.74 weight 5.458
        item osd.75 weight 5.458
        item osd.76 weight 5.458
        item osd.77 weight 5.458
        item osd.78 weight 5.458
        item osd.79 weight 5.458
        item osd.80 weight 5.458
        item osd.81 weight 5.458
        item osd.82 weight 5.458
        item osd.83 weight 5.458
        item osd.84 weight 5.458
        item osd.85 weight 5.458
        item osd.86 weight 5.458
        item osd.87 weight 5.458
        item osd.88 weight 5.458
        item osd.89 weight 5.458
        item osd.90 weight 5.458
        item osd.91 weight 5.458
        item osd.92 weight 5.458
        item osd.93 weight 5.458
        item osd.94 weight 5.458
        item osd.95 weight 5.458
}
host cn5 {
        id -6           # do not change unnecessarily
        # weight 130.992
        alg straw2
        hash 0  # rjenkins1
        item osd.96 weight 5.458
        item osd.97 weight 5.458
        item osd.98 weight 5.458
        item osd.99 weight 5.458
        item osd.100 weight 5.458
        item osd.101 weight 5.458
        item osd.102 weight 5.458
        item osd.103 weight 5.458
        item osd.104 weight 5.458
        item osd.105 weight 5.458
        item osd.106 weight 5.458
        item osd.107 weight 5.458
        item osd.108 weight 5.458
        item osd.109 weight 5.458
        item osd.110 weight 5.458
        item osd.111 weight 5.458
        item osd.112 weight 5.458
        item osd.113 weight 5.458
        item osd.114 weight 5.458
        item osd.115 weight 5.458
        item osd.116 weight 5.458
        item osd.117 weight 5.458
        item osd.118 weight 5.458
        item osd.119 weight 5.458
}
root default {
        id -1           # do not change unnecessarily
        # weight 649.490
        alg straw
        hash 0  # rjenkins1
        item cn3 weight 130.989
        item cn2 weight 125.534
        item cn1 weight 130.989
        item cn4 weight 130.989
        item cn5 weight 130.989
}

# rules
rule replicated_ruleset {
        ruleset 0
        type replicated
        min_size 1
        max_size 10
        step take default
        step chooseleaf firstn 0 type host
        step emit
}
rule cdvr_ec {
        ruleset 1
        type erasure
        min_size 3
        max_size 5
        step set_chooseleaf_tries 5
        step set_choose_tries 100
        step take default
        step chooseleaf indep 0 type host
        step emit
}
rule cdvr_ec1 {
        ruleset 2
        type erasure
        min_size 3
        max_size 5
        step set_chooseleaf_tries 5
        step set_choose_tries 100
        step take default
        step chooseleaf indep 0 type host
        step emit
}

# end crush map
_______________________________________________
ceph-users mailing list
ceph-users@lists.ceph.com
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

Reply via email to