It looks like your OSDs all have weight zero for some reason. I'd fix that.
:)
-Greg
On Tue, Dec 9, 2014 at 6:24 AM Giuseppe Civitella <
[email protected]> wrote:
> Hi,
>
> thanks for the quick answer.
> I did try the force_create_pg on a pg but is stuck on "creating":
> root@ceph-mon1:/home/ceph# ceph pg dump |grep creating
> dumped all in format plain
> 2.2f 0 0 0 0 0 0 0 creating
> 2014-12-09 13:11:37.384808 0'0 0:0 [] -1 []
> -1 0'0 0.0000000'0 0.000000
>
> root@ceph-mon1:/home/ceph# ceph pg 2.2f query
> { "state": "active+degraded",
> "epoch": 105,
> "up": [
> 0],
> "acting": [
> 0],
> "actingbackfill": [
> "0"],
> "info": { "pgid": "2.2f",
> "last_update": "0'0",
> "last_complete": "0'0",
> "log_tail": "0'0",
> "last_user_version": 0,
> "last_backfill": "MAX",
> "purged_snaps": "[]",
> "last_scrub": "0'0",
> "last_scrub_stamp": "2014-12-06 14:15:11.499769",
> "last_deep_scrub": "0'0",
> "last_deep_scrub_stamp": "2014-12-06 14:15:11.499769",
> "last_clean_scrub_stamp": "0.000000",
> "log_size": 0,
> "ondisk_log_size": 0,
> "stats_invalid": "0",
> "stat_sum": { "num_bytes": 0,
> "num_objects": 0,
> "num_object_clones": 0,
> "num_object_copies": 0,
> "num_objects_missing_on_primary": 0,
> "num_objects_degraded": 0,
> "num_objects_unfound": 0,
> "num_objects_dirty": 0,
> "num_whiteouts": 0,
> "num_read": 0,
> "num_read_kb": 0,
> "num_write": 0,
> "num_write_kb": 0,
> "num_scrub_errors": 0,
> "num_shallow_scrub_errors": 0,
> "num_deep_scrub_errors": 0,
> "num_objects_recovered": 0,
> "num_bytes_recovered": 0,
> "num_keys_recovered": 0,
> "num_objects_omap": 0,
> "num_objects_hit_set_archive": 0},
> "stat_cat_sum": {},
> "up": [
> 0],
> "acting": [
> 0],
> "up_primary": 0,
> "acting_primary": 0},
> "empty": 1,
> "dne": 0,
> "incomplete": 0,
> "last_epoch_started": 104,
> "hit_set_history": { "current_last_update": "0'0",
> "current_last_stamp": "0.000000",
> "current_info": { "begin": "0.000000",
> "end": "0.000000",
> "version": "0'0"},
> "history": []}},
> "peer_info": [],
> "recovery_state": [
> { "name": "Started\/Primary\/Active",
> "enter_time": "2014-12-09 12:12:52.760384",
> "might_have_unfound": [],
> "recovery_progress": { "backfill_targets": [],
> "waiting_on_backfill": [],
> "last_backfill_started": "0\/\/0\/\/-1",
> "backfill_info": { "begin": "0\/\/0\/\/-1",
> "end": "0\/\/0\/\/-1",
> "objects": []},
> "peer_backfill_info": [],
> "backfills_in_flight": [],
> "recovering": [],
> "pg_backend": { "pull_from_peer": [],
> "pushing": []}},
> "scrub": { "scrubber.epoch_start": "0",
> "scrubber.active": 0,
> "scrubber.block_writes": 0,
> "scrubber.finalizing": 0,
> "scrubber.waiting_on": 0,
> "scrubber.waiting_on_whom": []}},
> { "name": "Started",
> "enter_time": "2014-12-09 12:12:51.845686"}],
> "agent_state": {}}root@ceph-mon1:/home/ceph#
>
>
>
> 2014-12-09 13:01 GMT+01:00 Irek Fasikhov <[email protected]>:
>
>> Hi.
>>
>> http://ceph.com/docs/master/rados/troubleshooting/troubleshooting-pg/
>>
>> ceph pg force_create_pg <pgid>
>>
>>
>> 2014-12-09 14:50 GMT+03:00 Giuseppe Civitella <
>> [email protected]>:
>>
>>> Hi all,
>>>
>>> last week I installed a new ceph cluster on 3 vm running Ubuntu 14.04
>>> with default kernel.
>>> There is a ceph monitor a two osd hosts. Here are some datails:
>>> ceph -s
>>> cluster c46d5b02-dab1-40bf-8a3d-f8e4a77b79da
>>> health HEALTH_WARN 192 pgs degraded; 192 pgs stuck unclean
>>> monmap e1: 1 mons at {ceph-mon1=10.1.1.83:6789/0}, election epoch
>>> 1, quorum 0 ceph-mon1
>>> osdmap e83: 6 osds: 6 up, 6 in
>>> pgmap v231: 192 pgs, 3 pools, 0 bytes data, 0 objects
>>> 207 MB used, 30446 MB / 30653 MB avail
>>> 192 active+degraded
>>>
>>> root@ceph-mon1:/home/ceph# ceph osd dump
>>> epoch 99
>>> fsid c46d5b02-dab1-40bf-8a3d-f8e4a77b79da
>>> created 2014-12-06 13:15:06.418843
>>> modified 2014-12-09 11:38:04.353279
>>> flags
>>> pool 0 'data' replicated size 2 min_size 1 crush_ruleset 0 object_hash
>>> rjenkins pg_num 64 pgp_num 64 last_change 18 flags hashpspool
>>> crash_replay_interval 45 stripe_width 0
>>> pool 1 'metadata' replicated size 2 min_size 1 crush_ruleset 0
>>> object_hash rjenkins pg_num 64 pgp_num 64 last_change 19 flags hashpspool
>>> stripe_width 0
>>> pool 2 'rbd' replicated size 2 min_size 1 crush_ruleset 0 object_hash
>>> rjenkins pg_num 64 pgp_num 64 last_change 20 flags hashpspool stripe_width 0
>>> max_osd 6
>>> osd.0 up in weight 1 up_from 90 up_thru 90 down_at 89
>>> last_clean_interval [58,89) 10.1.1.84:6805/995 10.1.1.84:6806/4000995
>>> 10.1.1.84:6807/4000995 10.1.1.84:6808/4000995 exists,up
>>> e3895075-614d-48e2-b956-96e13dbd87fe
>>> osd.1 up in weight 1 up_from 88 up_thru 0 down_at 87
>>> last_clean_interval [8,87) 10.1.1.85:6800/23146 10.1.1.85:6815/7023146
>>> 10.1.1.85:6816/7023146 10.1.1.85:6817/7023146 exists,up
>>> 144bc6ee-2e3d-4118-a460-8cc2bb3ec3e8
>>> osd.2 up in weight 1 up_from 61 up_thru 0 down_at 60
>>> last_clean_interval [11,60) 10.1.1.85:6805/26784 10.1.1.85:6802/5026784
>>> 10.1.1.85:6811/5026784 10.1.1.85:6812/5026784 exists,up
>>> 8d5c7108-ef11-4947-b28c-8e20371d6d78
>>> osd.3 up in weight 1 up_from 95 up_thru 0 down_at 94
>>> last_clean_interval [57,94) 10.1.1.84:6800/810 10.1.1.84:6810/3000810
>>> 10.1.1.84:6811/3000810 10.1.1.84:6812/3000810 exists,up
>>> bd762b2d-f94c-4879-8865-cecd63895557
>>> osd.4 up in weight 1 up_from 97 up_thru 0 down_at 96
>>> last_clean_interval [74,96) 10.1.1.84:6801/9304 10.1.1.84:6802/2009304
>>> 10.1.1.84:6803/2009304 10.1.1.84:6813/2009304 exists,up
>>> 7d28a54b-b474-4369-b958-9e6bf6c856aa
>>> osd.5 up in weight 1 up_from 99 up_thru 0 down_at 98
>>> last_clean_interval [79,98) 10.1.1.85:6801/19513 10.1.1.85:6808/2019513
>>> 10.1.1.85:6810/2019513 10.1.1.85:6813/2019513 exists,up
>>> f4d76875-0e40-487c-a26d-320f8b8d60c5
>>>
>>> root@ceph-mon1:/home/ceph# ceph osd tree
>>> # id weight type name up/down reweight
>>> -1 0 root default
>>> -2 0 host ceph-osd1
>>> 0 0 osd.0 up 1
>>> 3 0 osd.3 up 1
>>> 4 0 osd.4 up 1
>>> -3 0 host ceph-osd2
>>> 1 0 osd.1 up 1
>>> 2 0 osd.2 up 1
>>> 5 0 osd.5 up 1
>>>
>>> Current HEALTH_WARN state says "192 active+degraded" since I rebooted an
>>> osd host. Previously it was "incomplete". It never reached a HEALTH_OK
>>> state.
>>> Any hint about what to do next to have an healthy cluster?
>>>
>>>
>>> _______________________________________________
>>> ceph-users mailing list
>>> [email protected]
>>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>>>
>>>
>>
>>
>> --
>> С уважением, Фасихов Ирек Нургаязович
>> Моб.: +79229045757
>>
>
> _______________________________________________
> ceph-users mailing list
> [email protected]
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com