Hi Karan,
root@ceph-admin-storage:~/ceph-cluster/crush-map-4-ceph-user-list# ceph osd
getcrushmap -o crushmap.bin
got crush map from osdmap epoch 30748
root@ceph-admin-storage:~/ceph-cluster/crush-map-4-ceph-user-list# crushtool -d
crushmap.bin -o crushmap.txt
root@ceph-admin-storage:~/ceph-cluster/crush-map-4-ceph-user-list# cat
crushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 osd.16
device 17 osd.17
device 18 osd.18
device 19 osd.19
device 20 osd.20
device 21 device21
device 22 osd.22
device 23 osd.23
device 24 osd.24
device 25 osd.25
device 26 osd.26
device 27 device27
device 28 osd.28
device 29 osd.29
device 30 osd.30
device 31 osd.31
device 32 osd.32
device 33 osd.33
device 34 osd.34
device 35 osd.35
device 36 osd.36
device 37 osd.37
device 38 osd.38
device 39 osd.39
device 40 device40
device 41 device41
device 42 osd.42
device 43 osd.43
device 44 osd.44
device 45 osd.45
device 46 osd.46
device 47 osd.47
device 48 osd.48
device 49 osd.49
device 50 osd.50
device 51 osd.51
device 52 osd.52
device 53 osd.53
device 54 osd.54
device 55 osd.55
device 56 osd.56
device 57 osd.57
device 58 osd.58
# types
type 0 osd
type 1 host
type 2 rack
type 3 row
type 4 room
type 5 datacenter
type 6 root
# buckets
host ceph-1-storage {
id -2 # do not change unnecessarily
# weight 19.330
alg straw
hash 0 # rjenkins1
item osd.0 weight 0.910
item osd.2 weight 0.910
item osd.3 weight 0.910
item osd.4 weight 1.820
item osd.9 weight 1.360
item osd.11 weight 0.680
item osd.6 weight 3.640
item osd.5 weight 1.820
item osd.7 weight 3.640
item osd.8 weight 3.640
}
host ceph-2-storage {
id -3 # do not change unnecessarily
# weight 20.000
alg straw
hash 0 # rjenkins1
item osd.14 weight 3.640
item osd.18 weight 1.360
item osd.19 weight 1.360
item osd.15 weight 3.640
item osd.1 weight 3.640
item osd.12 weight 3.640
item osd.22 weight 0.680
item osd.23 weight 0.680
item osd.26 weight 0.680
item osd.36 weight 0.680
}
host ceph-5-storage {
id -4 # do not change unnecessarily
# weight 11.730
alg straw
hash 0 # rjenkins1
item osd.32 weight 0.270
item osd.37 weight 0.270
item osd.42 weight 0.270
item osd.43 weight 1.820
item osd.44 weight 1.820
item osd.45 weight 1.820
item osd.46 weight 1.820
item osd.47 weight 1.820
item osd.48 weight 1.820
}
room room0 {
id -8 # do not change unnecessarily
# weight 51.060
alg straw
hash 0 # rjenkins1
item ceph-1-storage weight 19.330
item ceph-2-storage weight 20.000
item ceph-5-storage weight 11.730
}
host ceph-3-storage {
id -5 # do not change unnecessarily
# weight 15.920
alg straw
hash 0 # rjenkins1
item osd.24 weight 1.820
item osd.25 weight 1.820
item osd.29 weight 1.360
item osd.10 weight 3.640
item osd.13 weight 3.640
item osd.20 weight 3.640
}
host ceph-4-storage {
id -6 # do not change unnecessarily
# weight 20.000
alg straw
hash 0 # rjenkins1
item osd.34 weight 3.640
item osd.38 weight 1.360
item osd.39 weight 1.360
item osd.16 weight 3.640
item osd.30 weight 0.680
item osd.35 weight 3.640
item osd.17 weight 3.640
item osd.28 weight 0.680
item osd.31 weight 0.680
item osd.33 weight 0.680
}
host ceph-6-storage {
id -7 # do not change unnecessarily
# weight 12.720
alg straw
hash 0 # rjenkins1
item osd.49 weight 0.450
item osd.50 weight 0.450
item osd.51 weight 0.450
item osd.52 weight 0.450
item osd.53 weight 1.820
item osd.54 weight 1.820
item osd.55 weight 1.820
item osd.56 weight 1.820
item osd.57 weight 1.820
item osd.58 weight 1.820
}
room room1 {
id -9 # do not change unnecessarily
# weight 48.640
alg straw
hash 0 # rjenkins1
item ceph-3-storage weight 15.920
item ceph-4-storage weight 20.000
item ceph-6-storage weight 12.720
}
root default {
id -1 # do not change unnecessarily
# weight 99.700
alg straw
hash 0 # rjenkins1
item room0 weight 51.060
item room1 weight 48.640
}
# rules
rule data {
ruleset 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule metadata {
ruleset 1
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule rbd {
ruleset 2
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
# end crush map
root@ceph-admin-storage:~# ceph osd dump | grep -i pool
pool 0 'data' replicated size 2 min_size 1 crush_ruleset 0 object_hash rjenkins
pg_num 2048 pgp_num 2048 last_change 4623 crash_replay_interval 45 stripe_width 0
pool 1 'metadata' replicated size 2 min_size 1 crush_ruleset 1 object_hash
rjenkins pg_num 2048 pgp_num 2048 last_change 4627 stripe_width 0
pool 2 'rbd' replicated size 2 min_size 1 crush_ruleset 2 object_hash rjenkins
pg_num 2048 pgp_num 2048 last_change 4632 stripe_width 0
Mike
________________________________
Von: Karan Singh [[email protected]]
Gesendet: Dienstag, 12. August 2014 10:35
An: Riederer, Michael
Cc: [email protected]
Betreff: Re: [ceph-users] HEALTH_WARN 4 pgs incomplete; 4 pgs stuck inactive; 4
pgs stuck unclean
Can you provide your cluster’s ceph osd dump | grep -i pool and crush map
output.
- Karan -
On 12 Aug 2014, at 10:40, Riederer, Michael
<[email protected]<mailto:[email protected]>> wrote:
Hi all,
How do I get my Ceph Cluster back to a healthy state?
root@ceph-admin-storage:~# ceph -v
ceph version 0.80.5 (38b73c67d375a2552d8ed67843c8a65c2c0feba6)
root@ceph-admin-storage:~# ceph -s
cluster 6b481875-8be5-4508-b075-e1f660fd7b33
health HEALTH_WARN 4 pgs incomplete; 4 pgs stuck inactive; 4 pgs stuck
unclean
monmap e2: 3 mons at
{ceph-1-storage=10.65.150.101:6789/0,ceph-2-storage=10.65.150.102:6789/0,ceph-3-storage=10.65.150.103:6789/0},
election epoch 5010, quorum 0,1,2 ceph-1-storage,ceph-2-storage,ceph-3-storage
osdmap e30748: 55 osds: 55 up, 55 in
pgmap v10800465: 6144 pgs, 3 pools, 11002 GB data, 2762 kobjects
22077 GB used, 79933 GB / 102010 GB avail
6138 active+clean
4 incomplete
2 active+clean+replay
root@ceph-admin-storage:~# ceph health detail
HEALTH_WARN 4 pgs incomplete; 4 pgs stuck inactive; 4 pgs stuck unclean
pg 2.92 is stuck inactive since forever, current state incomplete, last acting
[8,13]
pg 2.c1 is stuck inactive since forever, current state incomplete, last acting
[13,7]
pg 2.e3 is stuck inactive since forever, current state incomplete, last acting
[20,7]
pg 2.587 is stuck inactive since forever, current state incomplete, last acting
[13,5]
pg 2.92 is stuck unclean since forever, current state incomplete, last acting
[8,13]
pg 2.c1 is stuck unclean since forever, current state incomplete, last acting
[13,7]
pg 2.e3 is stuck unclean since forever, current state incomplete, last acting
[20,7]
pg 2.587 is stuck unclean since forever, current state incomplete, last acting
[13,5]
pg 2.587 is incomplete, acting [13,5]
pg 2.e3 is incomplete, acting [20,7]
pg 2.c1 is incomplete, acting [13,7]
pg 2.92 is incomplete, acting [8,13]
root@ceph-admin-storage:~# ceph pg dump_stuck inactive
ok
pg_stat objects mip degr unf bytes log disklog state
state_stamp v reported up up_primary acting acting_primary
last_scrub scrub_stamp last_deep_scrub deep_scrub_stamp
2.92 0 0 0 0 0 0 0 incomplete 2014-08-08
12:39:20.204592 0'0 30748:7729 [8,13] 8 [8,13] 8
13503'1390419 2014-06-26 01:57:48.727625 13503'1390419 2014-06-22
01:57:30.114186
2.c1 0 0 0 0 0 0 0 incomplete 2014-08-08
12:39:18.846542 0'0 30748:7117 [13,7] 13 [13,7] 13
13503'1687017 2014-06-26 20:52:51.249864 13503'1687017 2014-06-22
14:24:22.633554
2.e3 0 0 0 0 0 0 0 incomplete 2014-08-08
12:39:29.311552 0'0 30748:8027 [20,7] 20 [20,7] 20
13503'1398727 2014-06-26 07:03:25.899254 13503'1398727 2014-06-21
07:02:31.393053
2.587 0 0 0 0 0 0 0 incomplete 2014-08-08
12:39:19.715724 0'0 30748:7060 [13,5] 13 [13,5] 13
13646'1542934 2014-06-26 07:48:42.089935 13646'1542934 2014-06-22
07:46:20.363695
root@ceph-admin-storage:~# ceph osd tree
# id weight type name up/down reweight
-1 99.7 root default
-8 51.06 room room0
-2 19.33 host ceph-1-storage
0 0.91 osd.0 up 1
2 0.91 osd.2 up 1
3 0.91 osd.3 up 1
4 1.82 osd.4 up 1
9 1.36 osd.9 up 1
11 0.68 osd.11 up 1
6 3.64 osd.6 up 1
5 1.82 osd.5 up 1
7 3.64 osd.7 up 1
8 3.64 osd.8 up 1
-3 20 host ceph-2-storage
14 3.64 osd.14 up 1
18 1.36 osd.18 up 1
19 1.36 osd.19 up 1
15 3.64 osd.15 up 1
1 3.64 osd.1 up 1
12 3.64 osd.12 up 1
22 0.68 osd.22 up 1
23 0.68 osd.23 up 1
26 0.68 osd.26 up 1
36 0.68 osd.36 up 1
-4 11.73 host ceph-5-storage
32 0.27 osd.32 up 1
37 0.27 osd.37 up 1
42 0.27 osd.42 up 1
43 1.82 osd.43 up 1
44 1.82 osd.44 up 1
45 1.82 osd.45 up 1
46 1.82 osd.46 up 1
47 1.82 osd.47 up 1
48 1.82 osd.48 up 1
-9 48.64 room room1
-5 15.92 host ceph-3-storage
24 1.82 osd.24 up 1
25 1.82 osd.25 up 1
29 1.36 osd.29 up 1
10 3.64 osd.10 up 1
13 3.64 osd.13 up 1
20 3.64 osd.20 up 1
-6 20 host ceph-4-storage
34 3.64 osd.34 up 1
38 1.36 osd.38 up 1
39 1.36 osd.39 up 1
16 3.64 osd.16 up 1
30 0.68 osd.30 up 1
35 3.64 osd.35 up 1
17 3.64 osd.17 up 1
28 0.68 osd.28 up 1
31 0.68 osd.31 up 1
33 0.68 osd.33 up 1
-7 12.72 host ceph-6-storage
49 0.45 osd.49 up 1
50 0.45 osd.50 up 1
51 0.45 osd.51 up 1
52 0.45 osd.52 up 1
53 1.82 osd.53 up 1
54 1.82 osd.54 up 1
55 1.82 osd.55 up 1
56 1.82 osd.56 up 1
57 1.82 osd.57 up 1
58 1.82 osd.58 up 1
What I have tried so far:
ceph pg repair 2.587 [2.e3 2.c1 2.92]
ceph pg force_create_pg 2.587 [2.e3 2.c1 2.92]
ceph osd lost 5 --yes-i-really-mean-it [7 8 13 20]
The history in brief:
I installed Cuttlefish and updated to Dumpling and to Emperor. The Cluster was
healthy. Maybe I made a mistake during repair of 8 broken osds, but from then
on I had incompletepgs. At last I have updated from Emperor to Firefly.
Regards,
Mike
--------------------------------------------------------------------------------------------------
Bayerischer Rundfunk; Rundfunkplatz 1; 80335 München
Telefon: +49 89 590001; E-Mail: [email protected]<mailto:[email protected]>; Website:
http://www.BR.de<http://www.br.de/>
_______________________________________________
ceph-users mailing list
[email protected]<mailto:[email protected]>
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
--------------------------------------------------------------------------------------------------
Bayerischer Rundfunk; Rundfunkplatz 1; 80335 München
Telefon: +49 89 590001; E-Mail: [email protected]; Website: http://www.BR.de
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com