Hi,
How old is this cluster? As this might be a CRUSH tunables issue where
this pops up.
You can try (might move a lot of data!)
$ ceph osd getcrushmap -o crushmap.backup
$ ceph osd crush tunables optimal
If things go wrong you always have the old CRUSHmap:
$ ceph osd setcrushmap -i crushmap.backup
0.94.10 is EOL as well, I would consider upgrading after this PG becomes
active+clean
Wido
On 12/3/18 2:51 PM, Athanasios Panterlis wrote:
> Hi all,
>
> I am managing a typical small ceph cluster that consists of 4 nodes with
> each one having 7 OSDs (some in hdd pool, some in ssd pool)
>
> Having a healthy cluster and following some space issues due to bad pg
> management from ceph, I tried some reweighs in specific OSDs.
> Unfortunately the reballancing after reweigh a specific pg went to
> active+remmaped state and I have now a couple of misplaced objects
> (3359/6118678 objects misplaced (0.055%))
>
> Cluster had (for iops reasons) the scrub and deep-scrub options
> disabled. I enabled a simple scrub to all PG and waited to run. It
> finished completely without problems I guess. I would like to avoid
> deep-scrub, but if this is going to help I will run it once.
>
> I am thinking of declaring OSD 26 as lost. This way, new pg copy from 6
> will be created, correct?
> Any other less harmful thoughts on how to fix it?
>
> I attached all the information I could provide. I am also pasting them
> raw below:
>
> *Query for faulty PG:*
>
> {
> "state": "active+remapped",
> "snap_trimq": "[]",
> "epoch": 11755,
> "up": [
> 6
> ],
> "acting": [
> 6,
> 26
> ],
> "actingbackfill": [
> "6",
> "26"
> ],
> "info": {
> "pgid": "1.11d",
> "last_update": "11755'60561210",
> "last_complete": "11755'60561210",
> "log_tail": "11755'60558123",
> "last_user_version": 60561210,
> "last_backfill": "MAX",
> "purged_snaps": "[1~33,36~22]",
> "history": {
> "epoch_created": 31,
> "last_epoch_started": 11681,
> "last_epoch_clean": 11681,
> "last_epoch_split": 0,
> "same_up_since": 11679,
> "same_interval_since": 11680,
> "same_primary_since": 11510,
> "last_scrub": "449'16483",
> "last_scrub_stamp": "2016-09-14 15:25:14.228231",
> "last_deep_scrub": "448'16277",
> "last_deep_scrub_stamp": "2016-09-13 06:11:45.633007",
> "last_clean_scrub_stamp": "2016-09-14 15:25:14.228231"
> },
> "stats": {
> "version": "11755'60561210",
> "reported_seq": "50924585",
> "reported_epoch": "11755",
> "state": "active+remapped",
> "last_fresh": "2018-12-03 12:58:03.289251",
> "last_change": "2018-11-09 10:54:06.861873",
> "last_active": "2018-12-03 12:58:03.289251",
> "last_peered": "2018-12-03 12:58:03.289251",
> "last_clean": "2018-11-09 10:54:02.622866",
> "last_became_active": "0.000000",
> "last_became_peered": "0.000000",
> "last_unstale": "2018-12-03 12:58:03.289251",
> "last_undegraded": "2018-12-03 12:58:03.289251",
> "last_fullsized": "2018-12-03 12:58:03.289251",
> "mapping_epoch": 11679,
> "log_start": "11755'60558123",
> "ondisk_log_start": "11755'60558123",
> "created": 31,
> "last_epoch_clean": 11681,
> "parent": "0.0",
> "parent_split_bits": 0,
> "last_scrub": "449'16483",
> "last_scrub_stamp": "2016-09-14 15:25:14.228231",
> "last_deep_scrub": "448'16277",
> "last_deep_scrub_stamp": "2016-09-13 06:11:45.633007",
> "last_clean_scrub_stamp": "2016-09-14 15:25:14.228231",
> "log_size": 3087,
> "ondisk_log_size": 3087,
> "stats_invalid": "0",
> "stat_sum": {
> "num_bytes": 14031434258,
> "num_objects": 3359,
> "num_object_clones": 0,
> "num_object_copies": 6718,
> "num_objects_missing_on_primary": 0,
> "num_objects_degraded": 0,
> "num_objects_misplaced": 3359,
> "num_objects_unfound": 0,
> "num_objects_dirty": 3359,
> "num_whiteouts": 0,
> "num_read": 27359423,
> "num_read_kb": 1815932413,
> "num_write": 121113356,
> "num_write_kb": 2124776643,
> "num_scrub_errors": 0,
> "num_shallow_scrub_errors": 0,
> "num_deep_scrub_errors": 0,
> "num_objects_recovered": 65218,
> "num_bytes_recovered": 271765903872,
> "num_keys_recovered": 0,
> "num_objects_omap": 0,
> "num_objects_hit_set_archive": 0,
> "num_bytes_hit_set_archive": 0
> },
> "up": [
> 6
> ],
> "acting": [
> 6,
> 26
> ],
> "blocked_by": [],
> "up_primary": 6,
> "acting_primary": 6
> },
> "empty": 0,
> "dne": 0,
> "incomplete": 0,
> "last_epoch_started": 11681,
> "hit_set_history": {
> "current_last_update": "0'0",
> "current_last_stamp": "0.000000",
> "current_info": {
> "begin": "0.000000",
> "end": "0.000000",
> "version": "0'0",
> "using_gmt": "1"
> },
> "history": []
> }
> },
> "peer_info": [
> {
> "peer": "26",
> "pgid": "1.11d",
> "last_update": "11755'60561210",
> "last_complete": "11755'60561210",
> "log_tail": "11649'58446601",
> "last_user_version": 58449647,
> "last_backfill": "MAX",
> "purged_snaps": "[1~33,36~22]",
> "history": {
> "epoch_created": 31,
> "last_epoch_started": 11681,
> "last_epoch_clean": 11681,
> "last_epoch_split": 0,
> "same_up_since": 11679,
> "same_interval_since": 11680,
> "same_primary_since": 11510,
> "last_scrub": "449'16483",
> "last_scrub_stamp": "2016-09-14 15:25:14.228231",
> "last_deep_scrub": "448'16277",
> "last_deep_scrub_stamp": "2016-09-13 06:11:45.633007",
> "last_clean_scrub_stamp": "2016-09-14 15:25:14.228231"
> },
> "stats": {
> "version": "11678'58449646",
> "reported_seq": "48950066",
> "reported_epoch": "11678",
> "state": "active+clean",
> "last_fresh": "2018-11-09 10:54:02.263168",
> "last_change": "2018-11-09 08:01:12.116827",
> "last_active": "2018-11-09 10:54:02.263168",
> "last_peered": "2018-11-09 10:54:02.263168",
> "last_clean": "2018-11-09 10:54:02.263168",
> "last_became_active": "0.000000",
> "last_became_peered": "0.000000",
> "last_unstale": "2018-11-09 10:54:02.263168",
> "last_undegraded": "2018-11-09 10:54:02.263168",
> "last_fullsized": "2018-11-09 10:54:02.263168",
> "mapping_epoch": 11679,
> "log_start": "11649'58446601",
> "ondisk_log_start": "11649'58446601",
> "created": 31,
> "last_epoch_clean": 11610,
> "parent": "0.0",
> "parent_split_bits": 0,
> "last_scrub": "449'16483",
> "last_scrub_stamp": "2016-09-14 15:25:14.228231",
> "last_deep_scrub": "448'16277",
> "last_deep_scrub_stamp": "2016-09-13 06:11:45.633007",
> "last_clean_scrub_stamp": "2016-09-14 15:25:14.228231",
> "log_size": 3045,
> "ondisk_log_size": 3045,
> "stats_invalid": "0",
> "stat_sum": {
> "num_bytes": 18153595392,
> "num_objects": 4344,
> "num_object_clones": 0,
> "num_object_copies": 8688,
> "num_objects_missing_on_primary": 0,
> "num_objects_degraded": 0,
> "num_objects_misplaced": 0,
> "num_objects_unfound": 0,
> "num_objects_dirty": 4344,
> "num_whiteouts": 0,
> "num_read": 26674601,
> "num_read_kb": 1767105243,
> "num_write": 116892449,
> "num_write_kb": 2073693377,
> "num_scrub_errors": 0,
> "num_shallow_scrub_errors": 0,
> "num_deep_scrub_errors": 0,
> "num_objects_recovered": 65218,
> "num_bytes_recovered": 271765903872,
> "num_keys_recovered": 0,
> "num_objects_omap": 0,
> "num_objects_hit_set_archive": 0,
> "num_bytes_hit_set_archive": 0
> },
> "up": [
> 6
> ],
> "acting": [
> 6,
> 26
> ],
> "blocked_by": [],
> "up_primary": 6,
> "acting_primary": 6
> },
> "empty": 0,
> "dne": 0,
> "incomplete": 0,
> "last_epoch_started": 11681,
> "hit_set_history": {
> "current_last_update": "0'0",
> "current_last_stamp": "0.000000",
> "current_info": {
> "begin": "0.000000",
> "end": "0.000000",
> "version": "0'0",
> "using_gmt": "1"
> },
> "history": []
> }
> }
> ],
> "recovery_state": [
> {
> "name": "Started\/Primary\/Active",
> "enter_time": "2018-11-09 10:54:06.825830",
> "might_have_unfound": [],
> "recovery_progress": {
> "backfill_targets": [],
> "waiting_on_backfill": [],
> "last_backfill_started": "-1\/0\/\/0",
> "backfill_info": {
> "begin": "-1\/0\/\/0",
> "end": "-1\/0\/\/0",
> "objects": []
> },
> "peer_backfill_info": [],
> "backfills_in_flight": [],
> "recovering": [],
> "pg_backend": {
> "pull_from_peer": [],
> "pushing": []
> }
> },
> "scrub": {
> "scrubber.epoch_start": "0",
> "scrubber.active": 0,
> "scrubber.waiting_on": 0,
> "scrubber.waiting_on_whom": []
> }
> },
> {
> "name": "Started",
> "enter_time": "2018-11-09 10:54:05.789621"
> }
> ],
> "agent_state": {}
> }
>
> *Ceph status*
>
> health HEALTH_WARN
> 1 pgs stuck unclean
> recovery 3359/6118678 objects misplaced (0.055%)
> noout,nodeep-scrub flag(s) set
> monmap e3: 3 mons at
> {0=192.168.1.1:6789/0,1=192.168.1.2:6789/0,2=192.168.1.3:6789/0}
> election epoch 4882, quorum 0,1,2 0,1,2
> osdmap e11755: 27 osds: 27 up, 27 in; 1 remapped pgs
> flags noout,nodeep-scrub
> pgmap v62734988: 1024 pgs, 2 pools, 10183 GB data, 2557 kobjects
> 23768 GB used, 48720 GB / 72488 GB avail
> 3359/6118678 objects misplaced (0.055%)
> 1023 active+clean
> 1 active+remapped
> client io 141 kB/s rd, 14068 kB/s wr, 925 op/s
>
> 2018-12-03 12:58:52.109913 mon.0 [INF] pgmap v62734987: 1024 pgs: 1
> active+remapped, 1023 active+clean; 10183 GB data, 23768 GB used, 48720
> GB / 72488 GB avail; 8325 kB/s rd, 16182 kB/s wr, 1704 op/s;
> 3359/6118678 objects misplaced (0.055%)
>
> *OSD tree*
>
> ID WEIGHT REWEIGHT SIZE USE AVAIL %USE VAR TYPE NAME
>
> -11 37.19995 - 37204G 10257G 26946G 27.57 0.84 root hdd
>
> -12 9.29999 - 9301G 2531G 6769G 27.22 0.83 host hdd-node1
> 18 4.64999 1.00000 4650G 1226G 3424G 26.37 0.80 osd.18
>
> 19 4.64999 1.00000 4650G 1305G 3345G 28.06 0.86 osd.19
>
> -13 9.29999 - 9301G 2665G 6635G 28.66 0.87 host hdd-node2
> 20 4.64999 1.00000 4650G 1361G 3289G 29.27 0.89 osd.20
>
> 21 4.64999 1.00000 4650G 1304G 3346G 28.05 0.86 osd.21
>
> -14 9.29999 - 9301G 2628G 6672G 28.26 0.86 host hdd-node3
> 22 4.64999 1.00000 4650G 1396G 3254G 30.02 0.92 osd.22
>
> 23 4.64999 1.00000 4650G 1232G 3418G 26.50 0.81 osd.23
>
> -15 9.29999 - 9301G 2431G 6869G 26.15 0.80 host hdd-node4
> 24 4.64999 1.00000 4650G 1218G 3432G 26.20 0.80 osd.24
>
> 25 4.64999 1.00000 4650G 1213G 3436G 26.09 0.80 osd.25
>
> -1 35.14995 - 35284G 13512G 21771G 38.30 1.17 root default
>
> -2 9.25000 - 9285G 3431G 5853G 36.96 1.13 host node1
> 0 1.84999 1.00000 1857G 765G 1091G 41.24 1.26 osd.0
>
> 1 1.84999 1.00000 1857G 633G 1224G 34.09 1.04 osd.1
>
> 6 1.84999 1.00000 1857G 777G 1079G 41.88 1.28 osd.6
>
> 7 1.84999 0.89999 1857G 752G 1104G 40.54 1.24 osd.7
>
> 8 1.84999 1.00000 1857G 502G 1354G 27.06 0.83 osd.8
>
> -3 9.24995 - 9285G 3562G 5722G 38.37 1.17 host node2
> 2 1.84999 1.00000 1857G 766G 1090G 41.27 1.26 osd.2
>
> 3 1.84999 0.70000 1857G 674G 1182G 36.33 1.11 osd.3
>
> 9 1.84999 1.00000 1857G 580G 1276G 31.28 0.95 osd.9
>
> 10 1.84999 1.00000 1857G 814G 1042G 43.88 1.34 osd.10
>
> 11 1.84999 1.00000 1857G 725G 1131G 39.07 1.19 osd.11
>
> -4 9.25000 - 9285G 3561G 5724G 38.35 1.17 host node3
> 4 1.84999 1.00000 1857G 684G 1172G 36.88 1.12 osd.4
>
> 5 1.84999 1.00000 1857G 633G 1223G 34.11 1.04 osd.5
>
> 12 1.84999 1.00000 1857G 696G 1160G 37.49 1.14 osd.12
>
> 13 1.84999 0.70000 1857G 741G 1116G 39.90 1.22 osd.13
>
> 14 1.84999 0.89999 1857G 805G 1051G 43.37 1.32 osd.14
>
> -5 7.39999 - 7428G 2957G 4470G 39.81 1.21 host node4
> 15 1.84999 0.79999 1857G 742G 1115G 39.96 1.22 osd.15
>
> 16 1.84999 1.00000 1857G 634G 1222G 34.15 1.04 osd.16
>
> 17 1.84999 0.89999 1857G 803G 1053G 43.26 1.32 osd.17
>
> 26 1.84999 0.81000 1857G 777G 1079G 41.89 1.28 osd.26
>
> TOTAL 72488G 23770G 48718G 32.79
>
> MIN/MAX VAR: 0.80/1.34 STDDEV: 6.56
>
> *PG dump*
>
> version 62735224
> stamp 2018-12-03 13:02:52.799643
> last_osdmap_epoch 11755
> last_pg_scan 9537
> full_ratio 0.95
> nearfull_ratio 0.85
> pg_stat objects mip degr misp unf bytes log disklog state state_stamp v
> reported up up_primary acting acting_primary last_scrub scrub_stamp
> last_deep_scrub deep_scrub_stamp
>
> ///active+clean ones removed///
>
> 1.11d 3359 0 0 3359 0 14031434258 3034 3034 active+remapped 2018-11-09
> 10:54:06.861873 11755'60561357 11755:50924695 [6] 6 [6,26] 6 449'16483
> 2016-09-14 15:25:14.228231 448'16277 2016-09-13 06:11:45.633007
>
> ///active+clean ones removed///
>
> pool 1 1738101 0 0 3359 0 7253679601802 1562466 1562466
> pool 2 881071 0 0 0 0 3682217717410 1561924 1561924
> sum 2619172 0 0 3359 0 10935897319212 3124390 3124390
> osdstat kbused kbavail kb hb in hb out
> 0 803034868 1144251736 1947286604 [1,2,3,4,5,10,12,13,14,15,16,26] []
> 1 663754736 1283531868 1947286604 [0,2,3,5,10,12,13,15,16,17,26] []
> 2 803619260 1143667344 1947286604 [0,1,3,4,5,6,7,8,13,14,15,16,17,26] []
> 3 707438640 1239847964 1947286604 [0,1,2,4,5,6,7,8,12,13,14,15,17] []
> 4 718194072 1229092532 1947286604 [2,3,5,6,8,10,11,15,16,17,26] []
> 5 664279112 1283007492 1947286604 [0,1,2,3,4,6,8,10,11,15,17,26] []
> 6 815455088 1131831516 1947286604 [2,4,5,7,9,10,11,12,13,14,15,16,17,26] []
> 7 789396940 1157889664 1947286604 [2,4,6,8,9,10,11,12,13,14,15,16,17,26] []
> 8 526871252 1420415352 1947286604 [2,3,4,5,7,9,10,11,12,13,15,16,17] []
> 9 609147992 1338138612 1947286604 [0,1,4,7,8,10,13,14,15,16,17,26] []
> 10 854451916 1092834688 1947286604
> [0,1,4,5,7,8,9,11,12,13,14,15,16,17,26] []
> 11 760893328 1186393276 1947286604 [1,4,5,6,7,8,10,12,13,14,15,16,17,26] []
> 12 730109256 1217177348 1947286604 [0,6,7,8,9,10,11,13,15,16,17,26] []
> 13 777029008 1170257596 1947286604 [0,1,2,3,6,7,8,9,10,11,12,14,15,16] []
> 14 844469760 1102816844 1947286604 [0,1,2,3,6,7,9,10,11,13,15,16,17,26] []
> 15 778122444 1169164160 1947286604 [0,2,3,4,6,9,10,11,14,16] []
> 16 664960388 1282326216 1947286604 [1,2,3,4,5,6,7,8,10,11,12,13,15,17] []
> 17 842428012 1104858592 1947286604 [0,1,2,3,4,5,8,9,10,11,12,13,14,16,18] []
> 18 1285869748 3590537232 4876406980 [0,1,17,20,21,22,23,24,25,26] []
> 19 1368764192 3507642788 4876406980 [0,1,18,20,21,22,23,24,25,26] []
> 20 1427417120 3448989860 4876406980 [4,17,18,19,21,22,23,24,25,26] []
> 21 1367928664 3508478316 4876406980 [4,5,18,19,20,22,23,24,25,26] []
> 22 1464361956 3412045024 4876406980 [4,5,18,19,20,21,23,24,25,26] []
> 23 1292415092 3583991888 4876406980 [4,5,18,19,20,21,22,24,25,26] []
> 24 1277731204 3598675776 4876406980 [4,5,18,19,20,21,22,23,25,26] []
> 25 1272703828 3603703152 4876406980 [4,5,18,19,20,21,22,23,24,26] []
> 26 815682292 1131604312 1947286604 [0,1,2,3,4,5,6,7,8,10,11,12,13,25] []
>
> *Ceph health detail*
>
> HEALTH_WARN 1 pgs stuck unclean; recovery 3359/6120420 objects misplaced
> (0.055%); noout,nodeep-scrub flag(s) set
> pg 1.11d is stuck unclean for 2081576.511195, current state
> active+remapped, last acting [6,26]
> recovery 3359/6120420 objects misplaced (0.055%)
> noout,nodeep-scrub flag(s) set
>
> *Ceph version*
>
> ceph version 0.94.10 (b1e0532418e4631af01acbc0cedd426f1905f4af)
>
> Regards,
> Nasos Pan
>
> _______________________________________________
> ceph-users mailing list
> [email protected]
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com