Whoops, didn't mean to attach the file as rtf. Plaintext attached
{ "state": "incomplete",
"epoch": 224352,
"up": [
55,
89],
"acting": [
55,
89],
"info": { "pgid": "2.525",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"purged_snaps": "[]",
"history": { "epoch_created": 1,
"last_epoch_started": 222188,
"last_epoch_clean": 221446,
"last_epoch_split": 0,
"same_up_since": 224291,
"same_interval_since": 224291,
"same_primary_since": 224212,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355"},
"stats": { "version": "0'0",
"reported_seq": "2597",
"reported_epoch": "224352",
"state": "incomplete",
"last_fresh": "2014-10-21 11:51:08.013648",
"last_change": "2014-10-21 11:51:08.013648",
"last_active": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_unstale": "2014-10-21 11:51:08.013648",
"mapping_epoch": 224290,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 1,
"last_epoch_clean": 221446,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": "0",
"stat_sum": { "num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0},
"stat_cat_sum": {},
"up": [
55,
89],
"acting": [
55,
89],
"up_primary": 55,
"acting_primary": 55},
"empty": 1,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": { "current_last_update": "0'0",
"current_last_stamp": "0.000000",
"current_info": { "begin": "0.000000",
"end": "0.000000",
"version": "0'0"},
"history": []}},
"peer_info": [
{ "peer": "24",
"pgid": "2.525",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"purged_snaps": "[]",
"history": { "epoch_created": 1,
"last_epoch_started": 222188,
"last_epoch_clean": 221446,
"last_epoch_split": 0,
"same_up_since": 224291,
"same_interval_since": 224291,
"same_primary_since": 224212,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355"},
"stats": { "version": "0'0",
"reported_seq": "127",
"reported_epoch": "222817",
"state": "down+incomplete",
"last_fresh": "2014-10-20 19:08:54.228275",
"last_change": "2014-10-20 19:08:54.228275",
"last_active": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_unstale": "2014-10-20 19:08:54.228275",
"mapping_epoch": 224290,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 1,
"last_epoch_clean": 221446,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": "0",
"stat_sum": { "num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0},
"stat_cat_sum": {},
"up": [
55,
89],
"acting": [
55,
89],
"up_primary": 55,
"acting_primary": 55},
"empty": 1,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": { "current_last_update": "0'0",
"current_last_stamp": "0.000000",
"current_info": { "begin": "0.000000",
"end": "0.000000",
"version": "0'0"},
"history": []}},
{ "peer": "33",
"pgid": "2.525",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"purged_snaps": "[]",
"history": { "epoch_created": 0,
"last_epoch_started": 0,
"last_epoch_clean": 0,
"last_epoch_split": 0,
"same_up_since": 0,
"same_interval_since": 0,
"same_primary_since": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000"},
"stats": { "version": "0'0",
"reported_seq": "0",
"reported_epoch": "0",
"state": "inactive",
"last_fresh": "0.000000",
"last_change": "0.000000",
"last_active": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_unstale": "0.000000",
"mapping_epoch": 0,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": "0",
"stat_sum": { "num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0},
"stat_cat_sum": {},
"up": [],
"acting": [],
"up_primary": -1,
"acting_primary": -1},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": { "current_last_update": "0'0",
"current_last_stamp": "0.000000",
"current_info": { "begin": "0.000000",
"end": "0.000000",
"version": "0'0"},
"history": []}},
{ "peer": "48",
"pgid": "2.525",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"purged_snaps": "[]",
"history": { "epoch_created": 1,
"last_epoch_started": 222188,
"last_epoch_clean": 221446,
"last_epoch_split": 0,
"same_up_since": 224291,
"same_interval_since": 224291,
"same_primary_since": 224212,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355"},
"stats": { "version": "0'0",
"reported_seq": "276",
"reported_epoch": "222579",
"state": "down+peering",
"last_fresh": "2014-10-20 17:41:28.822067",
"last_change": "2014-10-20 15:53:42.587883",
"last_active": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_unstale": "2014-10-20 17:41:28.822067",
"mapping_epoch": 224290,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 1,
"last_epoch_clean": 221446,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": "0",
"stat_sum": { "num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0},
"stat_cat_sum": {},
"up": [
55,
89],
"acting": [
55,
89],
"up_primary": 55,
"acting_primary": 55},
"empty": 1,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": { "current_last_update": "0'0",
"current_last_stamp": "0.000000",
"current_info": { "begin": "0.000000",
"end": "0.000000",
"version": "0'0"},
"history": []}},
{ "peer": "89",
"pgid": "2.525",
"last_update": "222234'589857",
"last_complete": "222234'589857",
"log_tail": "210673'586727",
"last_user_version": 589857,
"last_backfill":
"2b64bd25\/rb.0.3db5d.6b8b4567.00000009fd2a\/head\/\/2",
"purged_snaps": "[]",
"history": { "epoch_created": 1,
"last_epoch_started": 222188,
"last_epoch_clean": 221446,
"last_epoch_split": 0,
"same_up_since": 224291,
"same_interval_since": 224291,
"same_primary_since": 224212,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355"},
"stats": { "version": "222234'589857",
"reported_seq": "26",
"reported_epoch": "224211",
"state": "inactive",
"last_fresh": "2014-10-21 11:22:07.491337",
"last_change": "2014-10-21 11:22:07.491337",
"last_active": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_unstale": "2014-10-21 11:22:07.491337",
"mapping_epoch": 224290,
"log_start": "210673'586727",
"ondisk_log_start": "210673'586727",
"created": 1,
"last_epoch_clean": 221446,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "219662'589727",
"last_scrub_stamp": "2014-10-19 22:14:57.757110",
"last_deep_scrub": "210324'583295",
"last_deep_scrub_stamp": "2014-10-15 09:11:16.209355",
"last_clean_scrub_stamp": "2014-10-15 09:11:16.209355",
"log_size": 3130,
"ondisk_log_size": 3130,
"stats_invalid": "0",
"stat_sum": { "num_bytes": 38510624768,
"num_objects": 9245,
"num_object_clones": 14,
"num_object_copies": 18490,
"num_objects_missing_on_primary": 0,
"num_objects_degraded": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 9235,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 33,
"num_write_kb": 8192,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0},
"stat_cat_sum": {},
"up": [
55,
89],
"acting": [
55,
89],
"up_primary": 55,
"acting_primary": 55},
"empty": 0,
"dne": 0,
"incomplete": 1,
"last_epoch_started": 0,
"hit_set_history": { "current_last_update": "0'0",
"current_last_stamp": "0.000000",
"current_info": { "begin": "0.000000",
"end": "0.000000",
"version": "0'0"},
"history": []}}],
"recovery_state": [
{ "name": "Started\/Primary\/Peering",
"enter_time": "2014-10-21 11:51:08.013532",
"past_intervals": [
{ "first": 221445,
"last": 221446,
"maybe_went_rw": 1,
"up": [
85],
"acting": [
85,
85,
85]},
{ "first": 221447,
"last": 221447,
"maybe_went_rw": 0,
"up": [
33,
85],
"acting": [
33,
85,
33,
33]},
{ "first": 221448,
"last": 221471,
"maybe_went_rw": 1,
"up": [
33,
85],
"acting": [
85,
85,
33]},
{ "first": 221472,
"last": 221493,
"maybe_went_rw": 1,
"up": [
33],
"acting": [
33,
33,
33]},
{ "first": 221494,
"last": 221904,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 221905,
"last": 221905,
"maybe_went_rw": 0,
"up": [
33,
85],
"acting": [
33,
85,
33,
33]},
{ "first": 221906,
"last": 221906,
"maybe_went_rw": 0,
"up": [
33,
85],
"acting": [
85,
85,
33]},
{ "first": 221907,
"last": 221966,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
85,
85,
33]},
{ "first": 221967,
"last": 221968,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 221969,
"last": 221970,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
85,
85,
33]},
{ "first": 221971,
"last": 221973,
"maybe_went_rw": 0,
"up": [
24,
89],
"acting": [
85,
85,
24]},
{ "first": 221974,
"last": 221982,
"maybe_went_rw": 0,
"up": [
33,
89],
"acting": [
85,
85,
33]},
{ "first": 221983,
"last": 221984,
"maybe_went_rw": 0,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 221985,
"last": 222018,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
85,
85,
33]},
{ "first": 222019,
"last": 222055,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222056,
"last": 222058,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
85,
24,
85,
33]},
{ "first": 222059,
"last": 222066,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
24,
24,
33]},
{ "first": 222067,
"last": 222120,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222121,
"last": 222123,
"maybe_went_rw": 1,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 222124,
"last": 222125,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222126,
"last": 222176,
"maybe_went_rw": 1,
"up": [
48,
89],
"acting": [
48,
89,
48,
48]},
{ "first": 222177,
"last": 222179,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222180,
"last": 222183,
"maybe_went_rw": 1,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 222184,
"last": 222186,
"maybe_went_rw": 1,
"up": [
24,
89],
"acting": [
24,
89,
24,
24]},
{ "first": 222187,
"last": 222260,
"maybe_went_rw": 1,
"up": [
24,
89],
"acting": [
24,
24,
24]},
{ "first": 222261,
"last": 222262,
"maybe_went_rw": 1,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 222263,
"last": 222342,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222343,
"last": 222344,
"maybe_went_rw": 1,
"up": [
33],
"acting": [
33,
33,
33]},
{ "first": 222345,
"last": 222348,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222349,
"last": 222351,
"maybe_went_rw": 1,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 222352,
"last": 222353,
"maybe_went_rw": 1,
"up": [
33,
89],
"acting": [
33,
89,
33,
33]},
{ "first": 222354,
"last": 222355,
"maybe_went_rw": 1,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 222356,
"last": 222579,
"maybe_went_rw": 1,
"up": [
48,
89],
"acting": [
48,
89,
48,
48]},
{ "first": 222580,
"last": 222589,
"maybe_went_rw": 1,
"up": [
24,
89],
"acting": [
24,
89,
24,
24]},
{ "first": 222590,
"last": 222656,
"maybe_went_rw": 1,
"up": [
55,
89],
"acting": [
55,
89,
55,
55]},
{ "first": 222657,
"last": 222669,
"maybe_went_rw": 1,
"up": [
55],
"acting": [
55,
55,
55]},
{ "first": 222670,
"last": 222727,
"maybe_went_rw": 1,
"up": [
55,
85],
"acting": [
55,
85,
55,
55]},
{ "first": 222728,
"last": 222742,
"maybe_went_rw": 1,
"up": [
85],
"acting": [
85,
85,
85]},
{ "first": 222743,
"last": 222746,
"maybe_went_rw": 0,
"up": [],
"acting": [
-1,
-1]},
{ "first": 222747,
"last": 222749,
"maybe_went_rw": 1,
"up": [
24],
"acting": [
24,
24,
24]},
{ "first": 222750,
"last": 222817,
"maybe_went_rw": 1,
"up": [
24,
89],
"acting": [
24,
89,
24,
24]},
{ "first": 222818,
"last": 224174,
"maybe_went_rw": 1,
"up": [
55,
89],
"acting": [
55,
89,
55,
55]},
{ "first": 224175,
"last": 224177,
"maybe_went_rw": 1,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 224178,
"last": 224210,
"maybe_went_rw": 1,
"up": [
55,
89],
"acting": [
55,
89,
55,
55]},
{ "first": 224211,
"last": 224211,
"maybe_went_rw": 0,
"up": [
89],
"acting": [
89,
89,
89]},
{ "first": 224212,
"last": 224289,
"maybe_went_rw": 1,
"up": [
55,
89],
"acting": [
55,
89,
55,
55]},
{ "first": 224290,
"last": 224290,
"maybe_went_rw": 0,
"up": [
55],
"acting": [
55,
55,
55]}],
"probing_osds": [
"24",
"33",
"48",
"55",
"89"],
"down_osds_we_would_probe": [
85],
"peering_blocked_by": []},
{ "name": "Started",
"enter_time": "2014-10-21 11:51:08.013457"}],
"agent_state": {}}
Thanks,
Lincoln
On Oct 21, 2014, at 11:59 AM, Lincoln Bryant wrote:
> A small update on this, I rebooted all of the Ceph nodes and was able to then
> query one of the misbehaving pgs.
>
> I've attached the query for pg 2.525.
>
> <incomplete-pg-query-2.525.rtf>
>
> There are some things like this in the peer info:
>
> "up": [],
> "acting": [],
> "up_primary": -1,
> "acting_primary": -1},
>
>
> I also see things like:
> "down_osds_we_would_probe": [
> 85],
>
> But I don't have an OSD 85:
> 85 3.64 osd.85 DNE
>
> # ceph osd rm osd.85
> osd.85 does not exist.
> # ceph osd lost 85 --yes-i-really-mean-it
> osd.85 is not down or doesn't exist
>
> Any help would be greatly appreciated.
>
> Thanks,
> Lincoln
>
> On Oct 21, 2014, at 9:39 AM, Lincoln Bryant wrote:
>
>> Hi cephers,
>>
>> We have two pgs that are stuck in 'incomplete' state across two different
>> pools:
>> pg 2.525 is stuck inactive since forever, current state incomplete, last
>> acting [55,89]
>> pg 0.527 is stuck inactive since forever, current state incomplete, last
>> acting [55,89]
>> pg 0.527 is stuck unclean since forever, current state incomplete, last
>> acting [55,89]
>> pg 2.525 is stuck unclean since forever, current state incomplete, last
>> acting [55,89]
>> pg 0.527 is incomplete, acting [55,89]
>> pg 2.525 is incomplete, acting [55,89]
>>
>> Basically, we ran into a problem where we had 2x replication and 2 disks on
>> different machines died near-simultaneously, and my pgs were stuck in
>> 'down+peering'. I had to do some combination of declaring the OSDs as lost,
>> and running 'force_create_pg'. I realize the data on those pgs is now lost,
>> but I'm stuck as to how to get the pgs out of 'incomplete'.
>>
>> I also see many ops blocked on the primary OSD for these:
>> 100 ops are blocked > 67108.9 sec
>> 100 ops are blocked > 67108.9 sec on osd.55
>>
>> However, this is a new disk. If I 'ceph osd out osd.55', the pgs move to
>> another OSD and the new primary gets blocked ops. Restarting osd.55 does
>> nothing. Other pgs on osd.55 seem okay.
>>
>> I would attach the result of a query, but If I run a 'ceph pg 2.525 query',
>> the command totally hangs until I ctrl-c
>>
>> ceph pg 2.525 query
>> ^CError EINTR: problem getting command descriptions from pg.2.525
>>
>> I've also tried 'ceph pg repair 2.525', which does nothing.
>>
>> Any thoughts here? Are my pools totally sunk?
>>
>> Thanks,
>> Lincoln
>> _______________________________________________
>> ceph-users mailing list
>> [email protected]
>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com