Hi Varada,
On Mon, Dec 14, 2015 at 03:23:20AM +0000, Varada Kari wrote:
> Can get the details of
>
> 1. ceph health detail
> 2. ceph pg query <pg-num>
>
> of any one PG stuck peering
>
>
> Varada
The full health detail is over 9000 lines, but here's a summary:
# ceph health detail | head
HEALTH_WARN 3072 pgs peering; 3072 pgs stuck inactive; 3072 pgs stuck unclean;
1570 requests are blocked > 32 sec; 25 osds have slow requests; noout flag(s)
set
pg 3.1ae is stuck inactive for 23264.342056, current state peering, last acting
[16,4,8]
pg 2.1af is stuck inactive for 23621.565024, current state peering, last acting
[6,0]
pg 6.1ab is stuck inactive for 22843.875498, current state peering, last acting
[27,18,54]
pg 3.1af is stuck inactive for 23315.971276, current state peering, last acting
[17,16,24]
pg 2.1ae is stuck inactive for 19278.004657, current state peering, last acting
[7,1]
pg 6.1aa is stuck inactive for 19321.668092, current state peering, last acting
[31,39,56]
pg 3.1a8 is stuck inactive for 22897.969982, current state peering, last acting
[16,17,24]
pg 2.1a9 is stuck inactive for 23516.554757, current state peering, last acting
[14,7]
pg 6.1ad is stuck inactive for 23105.915508, current state peering, last acting
[33,47,20]
# ceph health detail | grep -v peering
34 ops are blocked > 16777.2 sec
1289 ops are blocked > 8388.61 sec
50 ops are blocked > 4194.3 sec
34 ops are blocked > 2097.15 sec
68 ops are blocked > 1048.58 sec
13 ops are blocked > 524.288 sec
11 ops are blocked > 16777.2 sec on osd.0
4 ops are blocked > 8388.61 sec on osd.0
5 ops are blocked > 8388.61 sec on osd.1
100 ops are blocked > 8388.61 sec on osd.2
100 ops are blocked > 8388.61 sec on osd.3
100 ops are blocked > 8388.61 sec on osd.4
80 ops are blocked > 8388.61 sec on osd.5
34 ops are blocked > 8388.61 sec on osd.6
27 ops are blocked > 4194.3 sec on osd.6
15 ops are blocked > 2097.15 sec on osd.6
6 ops are blocked > 1048.58 sec on osd.6
9 ops are blocked > 524.288 sec on osd.6
2 ops are blocked > 16777.2 sec on osd.7
20 ops are blocked > 4194.3 sec on osd.7
16 ops are blocked > 2097.15 sec on osd.7
62 ops are blocked > 1048.58 sec on osd.7
85 ops are blocked > 8388.61 sec on osd.8
80 ops are blocked > 8388.61 sec on osd.9
13 ops are blocked > 16777.2 sec on osd.10
3 ops are blocked > 8388.61 sec on osd.10
1 ops are blocked > 4194.3 sec on osd.10
1 ops are blocked > 2097.15 sec on osd.10
6 ops are blocked > 8388.61 sec on osd.11
5 ops are blocked > 8388.61 sec on osd.12
4 ops are blocked > 8388.61 sec on osd.13
2 ops are blocked > 8388.61 sec on osd.14
4 ops are blocked > 524.288 sec on osd.14
7 ops are blocked > 16777.2 sec on osd.15
12 ops are blocked > 8388.61 sec on osd.15
2 ops are blocked > 4194.3 sec on osd.15
2 ops are blocked > 2097.15 sec on osd.15
100 ops are blocked > 8388.61 sec on osd.16
82 ops are blocked > 8388.61 sec on osd.17
1 ops are blocked > 16777.2 sec on osd.18
100 ops are blocked > 8388.61 sec on osd.21
86 ops are blocked > 8388.61 sec on osd.24
100 ops are blocked > 8388.61 sec on osd.38
100 ops are blocked > 8388.61 sec on osd.42
100 ops are blocked > 8388.61 sec on osd.44
1 ops are blocked > 8388.61 sec on osd.51
25 osds have slow requests
noout flag(s) set
# ceph pg 3.1ae query
<<< hung, until ^c >>>
# ceph pg 2.1af query
{
"state": "peering",
"snap_trimq": "[]",
"epoch": 357236,
"up": [
6,
0
],
"acting": [
6,
0
],
"info": {
"pgid": "2.1af",
"last_update": "356361'1923761",
"last_complete": "356361'1923761",
"log_tail": "341349'1920757",
"last_user_version": 1923761,
"last_backfill": "MAX",
"purged_snaps": "[1~34,38~1b,55~2,59~2a,84~68,ee~62]",
"history": {
"epoch_created": 1,
"last_epoch_started": 356496,
"last_epoch_clean": 356496,
"last_epoch_split": 0,
"same_up_since": 357218,
"same_interval_since": 357218,
"same_primary_since": 357218,
"last_scrub": "356347'1923757",
"last_scrub_stamp": "2015-12-12 12:18:54.719534",
"last_deep_scrub": "356347'1923757",
"last_deep_scrub_stamp": "2015-12-12 12:18:54.719534",
"last_clean_scrub_stamp": "2015-12-12 12:18:54.719534"
},
"stats": {
"version": "356361'1923761",
"reported_seq": "37552607",
"reported_epoch": "357218",
"state": "peering",
"last_fresh": "2015-12-14 12:54:41.084804",
"last_change": "2015-12-14 12:54:41.084804",
"last_active": "2015-12-14 07:53:05.850772",
"last_peered": "2015-12-14 07:53:05.850772",
"last_clean": "2015-12-14 07:53:05.850772",
"last_became_active": "2013-09-11 09:13:39.309600",
"last_became_peered": "2013-09-11 09:13:39.309600",
"last_unstale": "2015-12-14 12:54:41.084804",
"last_undegraded": "2015-12-14 12:54:41.084804",
"last_fullsized": "2015-12-14 12:54:41.084804",
"mapping_epoch": 357168,
"log_start": "341349'1920757",
"ondisk_log_start": "341349'1920757",
"created": 1,
"last_epoch_clean": 356496,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "356347'1923757",
"last_scrub_stamp": "2015-12-12 12:18:54.719534",
"last_deep_scrub": "356347'1923757",
"last_deep_scrub_stamp": "2015-12-12 12:18:54.719534",
"last_clean_scrub_stamp": "2015-12-12 12:18:54.719534",
"log_size": 3004,
"ondisk_log_size": 3004,
"stats_invalid": "0",
"stat_sum": {
"num_bytes": 7360028160,
"num_objects": 2107,
"num_object_clones": 642,
"num_object_copies": 4214,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 569,
"num_whiteouts": 0,
"num_read": 726240,
"num_read_kb": 31291910,
"num_write": 127250,
"num_write_kb": 13514083,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 2187,
"num_bytes_recovered": 9137582592,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0
},
"up": [
6,
0
],
"acting": [
6,
0
],
"blocked_by": [
0
],
"up_primary": 6,
"acting_primary": 6
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 356496,
"hit_set_history": {
"current_last_update": "0'0",
"current_last_stamp": "0.000000",
"current_info": {
"begin": "0.000000",
"end": "0.000000",
"version": "0'0"
},
"history": []
}
},
"peer_info": [],
"recovery_state": [
{
"name": "Started\/Primary\/Peering\/GetInfo",
"enter_time": "2015-12-14 12:54:41.084784",
"requested_info_from": [
{
"osd": "0"
}
]
},
{
"name": "Started\/Primary\/Peering",
"enter_time": "2015-12-14 12:54:41.084773",
"past_intervals": [
{
"first": 356495,
"last": 356560,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 356561,
"last": 356608,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 356609,
"last": 356655,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 356656,
"last": 356670,
"maybe_went_rw": 1,
"up": [
6
],
"acting": [
6
],
"primary": 6,
"up_primary": 6
},
{
"first": 356671,
"last": 356681,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 356682,
"last": 356722,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 356723,
"last": 356723,
"maybe_went_rw": 0,
"up": [],
"acting": [],
"primary": -1,
"up_primary": -1
},
{
"first": 356724,
"last": 356824,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 356825,
"last": 356876,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 356877,
"last": 356920,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 356921,
"last": 356921,
"maybe_went_rw": 0,
"up": [],
"acting": [],
"primary": -1,
"up_primary": -1
},
{
"first": 356922,
"last": 356958,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 356959,
"last": 356963,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 356964,
"last": 357025,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 357026,
"last": 357026,
"maybe_went_rw": 0,
"up": [],
"acting": [],
"primary": -1,
"up_primary": -1
},
{
"first": 357027,
"last": 357041,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 357042,
"last": 357081,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 357082,
"last": 357082,
"maybe_went_rw": 0,
"up": [
6
],
"acting": [
6
],
"primary": 6,
"up_primary": 6
},
{
"first": 357083,
"last": 357088,
"maybe_went_rw": 0,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 357089,
"last": 357089,
"maybe_went_rw": 0,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
},
{
"first": 357090,
"last": 357167,
"maybe_went_rw": 1,
"up": [
6,
0
],
"acting": [
6,
0
],
"primary": 6,
"up_primary": 6
},
{
"first": 357168,
"last": 357217,
"maybe_went_rw": 1,
"up": [
0
],
"acting": [
0
],
"primary": 0,
"up_primary": 0
}
],
"probing_osds": [
"0",
"6"
],
"down_osds_we_would_probe": [],
"peering_blocked_by": []
},
{
"name": "Started",
"enter_time": "2015-12-14 12:54:41.084717"
}
],
"agent_state": {}
}
Chris
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com