Just wanted to close this open loop: I gave up attempting to recover pool 4 as it was just test data, and the PGs with unfound objects were localized to that pool. After I destroyed and recreated the pool this were fine.
Thank you for your help, Florian. ./JRH On Jun 3, 2014, at 6:30 PM, Jason Harley <[email protected]> wrote: > On Jun 3, 2014, at 5:58 PM, Smart Weblications GmbH - Florian Wiessner > <[email protected]> wrote: > >> I think it would be less painfull if you had removed and the immediatelly >> recreate the corrupted osd again to avoid 'holes' in the osd ids. It should >> work >> with your configuration anyhow, though. > > I agree with you… I learned about ‘lost’ after removing OSDs :\ > >> You should check other pg with ceph pg query and look out for >> >> "recovery_state": [ >> { "name": "Started\/Primary\/Active", >> "enter_time": "2014-06-03 18:27:58.473736", >> "might_have_unfound": [ >> { "osd": 2, >> "status": "already probed"}, >> { "osd": 3, >> "status": "already probed"}, >> { "osd": 12, >> "status": "osd is down"}, >> { "osd": 14, >> "status": "osd is down"}, >> { "osd": 19, >> "status": "osd is down"}, >> { "osd": 23, >> "status": "querying"}, >> { "osd": 26, >> "status": "already probed"}], >> >> >> And restart the osd that has status querying. > > Thank you, I will go through the other pgs and try this approach. > >> What do you get if you do ceph pg query 4.ff3 now? > > # ceph pg query 4.ff3 >> { "state": "active+clean", >> "epoch": 1650, >> "up": [ >> 23, >> 4], >> "acting": [ >> 23, >> 4], >> "info": { "pgid": "4.ff3", >> "last_update": "337'1080", >> "last_complete": "337'1080", >> "log_tail": "0'0", >> "last_backfill": "MAX", >> "purged_snaps": "[1~9]", >> "history": { "epoch_created": 3, >> "last_epoch_started": 1646, >> "last_epoch_clean": 1646, >> "last_epoch_split": 0, >> "same_up_since": 1645, >> "same_interval_since": 1645, >> "same_primary_since": 1645, >> "last_scrub": "337'1080", >> "last_scrub_stamp": "2014-06-03 16:19:28.591026", >> "last_deep_scrub": "337'32", >> "last_deep_scrub_stamp": "2014-05-29 20:28:58.517432", >> "last_clean_scrub_stamp": "2014-06-03 16:19:28.591026"}, >> "stats": { "version": "337'1080", >> "reported_seq": "1102", >> "reported_epoch": "1650", >> "state": "active+clean", >> "last_fresh": "2014-06-03 21:13:31.949714", >> "last_change": "2014-06-03 20:56:41.466837", >> "last_active": "2014-06-03 21:13:31.949714", >> "last_clean": "2014-06-03 21:13:31.949714", >> "last_became_active": "0.000000", >> "last_unstale": "2014-06-03 21:13:31.949714", >> "mapping_epoch": 1643, >> "log_start": "0'0", >> "ondisk_log_start": "0'0", >> "created": 3, >> "last_epoch_clean": 1646, >> "parent": "0.0", >> "parent_split_bits": 0, >> "last_scrub": "337'1080", >> "last_scrub_stamp": "2014-06-03 16:19:28.591026", >> "last_deep_scrub": "337'32", >> "last_deep_scrub_stamp": "2014-05-29 20:28:58.517432", >> "last_clean_scrub_stamp": "2014-06-03 16:19:28.591026", >> "log_size": 1080, >> "ondisk_log_size": 1080, >> "stats_invalid": "0", >> "stat_sum": { "num_bytes": 25165824, >> "num_objects": 3, >> "num_object_clones": 0, >> "num_object_copies": 0, >> "num_objects_missing_on_primary": 0, >> "num_objects_degraded": 0, >> "num_objects_unfound": 0, >> "num_read": 3205, >> "num_read_kb": 12615, >> "num_write": 1086, >> "num_write_kb": 88685, >> "num_scrub_errors": 0, >> "num_shallow_scrub_errors": 0, >> "num_deep_scrub_errors": 0, >> "num_objects_recovered": 9, >> "num_bytes_recovered": 75497472, >> "num_keys_recovered": 0}, >> "stat_cat_sum": {}, >> "up": [ >> 23, >> 4], >> "acting": [ >> 23, >> 4]}, >> "empty": 0, >> "dne": 0, >> "incomplete": 0, >> "last_epoch_started": 1646}, >> "recovery_state": [ >> { "name": "Started\/Primary\/Active", >> "enter_time": "2014-06-03 20:56:41.232146", >> "might_have_unfound": [], >> "recovery_progress": { "backfill_target": -1, >> "waiting_on_backfill": 0, >> "backfill_pos": "0\/\/0\/\/-1", >> "backfill_info": { "begin": "0\/\/0\/\/-1", >> "end": "0\/\/0\/\/-1", >> "objects": []}, >> "peer_backfill_info": { "begin": "0\/\/0\/\/-1", >> "end": "0\/\/0\/\/-1", >> "objects": []}, >> "backfills_in_flight": [], >> "pull_from_peer": [], >> "pushing": []}, >> "scrub": { "scrubber.epoch_start": "0", >> "scrubber.active": 0, >> "scrubber.block_writes": 0, >> "scrubber.finalizing": 0, >> "scrubber.waiting_on": 0, >> "scrubber.waiting_on_whom": []}}, >> { "name": "Started", >> "enter_time": "2014-06-03 20:56:40.300108"}]} > > Thank you for your help so far. I will respond with progress tomorrow. > > ./JRH >
_______________________________________________ ceph-users mailing list [email protected] http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
