On Fri, Jul 28, 2017 at 05:52:29PM +0800, linghucongsong wrote:
>
>
>
> You have two crush rule? One is ssd the other is hdd?
yes, exactly..
>
> Can you show ceph osd dump|grep pool
>
pool 3 'vm' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins
pg_num 1024 pgp_num 1024 last_change 69955 flags hashpspool
min_read_recency_for_promote 1 min_write_recency_for_promote 1 stripe_width 0
pool 4 'cephfs_data' replicated size 3 min_size 1 crush_ruleset 0 object_hash
rjenkins pg_num 1024 pgp_num 1024 last_change 74682 flags hashpspool
crash_replay_interval 45 min_write_recency_for_promote 1 stripe_width 0
pool 5 'cephfs_metadata' replicated size 3 min_size 1 crush_ruleset 0
object_hash rjenkins pg_num 1024 pgp_num 1024 last_change 74667 flags
hashpspool min_write_recency_for_promote 1 stripe_width 0
pool 11 'ssd' replicated size 3 min_size 1 crush_ruleset 1 object_hash rjenkins
pg_num 128 pgp_num 128 last_change 46119 flags hashpspool
min_write_recency_for_promote 1 stripe_width 0
> ceph osd crush dump
{
"devices": [
{
"id": 0,
"name": "osd.0"
},
{
"id": 1,
"name": "osd.1"
},
{
"id": 2,
"name": "osd.2"
},
{
"id": 3,
"name": "osd.3"
},
{
"id": 4,
"name": "osd.4"
},
{
"id": 5,
"name": "osd.5"
},
{
"id": 6,
"name": "osd.6"
},
{
"id": 7,
"name": "device7"
},
{
"id": 8,
"name": "osd.8"
},
{
"id": 9,
"name": "osd.9"
},
{
"id": 10,
"name": "osd.10"
},
{
"id": 11,
"name": "osd.11"
},
{
"id": 12,
"name": "osd.12"
},
{
"id": 13,
"name": "osd.13"
},
{
"id": 14,
"name": "osd.14"
},
{
"id": 15,
"name": "osd.15"
},
{
"id": 16,
"name": "osd.16"
},
{
"id": 17,
"name": "osd.17"
},
{
"id": 18,
"name": "osd.18"
},
{
"id": 19,
"name": "osd.19"
},
{
"id": 20,
"name": "osd.20"
},
{
"id": 21,
"name": "osd.21"
},
{
"id": 22,
"name": "osd.22"
},
{
"id": 23,
"name": "osd.23"
},
{
"id": 24,
"name": "osd.24"
},
{
"id": 25,
"name": "osd.25"
},
{
"id": 26,
"name": "osd.26"
}
],
"types": [
{
"type_id": 0,
"name": "osd"
},
{
"type_id": 1,
"name": "host"
},
{
"type_id": 2,
"name": "chassis"
},
{
"type_id": 3,
"name": "rack"
},
{
"type_id": 4,
"name": "row"
},
{
"type_id": 5,
"name": "pdu"
},
{
"type_id": 6,
"name": "pod"
},
{
"type_id": 7,
"name": "room"
},
{
"type_id": 8,
"name": "datacenter"
},
{
"type_id": 9,
"name": "region"
},
{
"type_id": 10,
"name": "root"
}
],
"buckets": [
{
"id": -1,
"name": "default",
"type_id": 10,
"type_name": "root",
"weight": 2575553,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": -4,
"weight": 779875,
"pos": 0
},
{
"id": -5,
"weight": 681571,
"pos": 1
},
{
"id": -6,
"weight": 511178,
"pos": 2
},
{
"id": -3,
"weight": 602929,
"pos": 3
}
]
},
{
"id": -2,
"name": "ssd",
"type_id": 10,
"type_name": "root",
"weight": 102233,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": -9,
"weight": 26214,
"pos": 0
},
{
"id": -10,
"weight": 39320,
"pos": 1
},
{
"id": -11,
"weight": 22282,
"pos": 2
},
{
"id": -7,
"weight": 14417,
"pos": 3
}
]
},
{
"id": -3,
"name": "v1d-sata",
"type_id": 1,
"type_name": "host",
"weight": 602929,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": 12,
"weight": 91750,
"pos": 0
},
{
"id": 20,
"weight": 91750,
"pos": 1
},
{
"id": 21,
"weight": 235929,
"pos": 2
},
{
"id": 22,
"weight": 91750,
"pos": 3
},
{
"id": 23,
"weight": 91750,
"pos": 4
}
]
},
{
"id": -4,
"name": "v1a",
"type_id": 1,
"type_name": "host",
"weight": 779875,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": 6,
"weight": 104857,
"pos": 0
},
{
"id": 8,
"weight": 117964,
"pos": 1
},
{
"id": 2,
"weight": 104857,
"pos": 2
},
{
"id": 0,
"weight": 111411,
"pos": 3
},
{
"id": 4,
"weight": 104857,
"pos": 4
},
{
"id": 25,
"weight": 235929,
"pos": 5
}
]
},
{
"id": -5,
"name": "v1b",
"type_id": 1,
"type_name": "host",
"weight": 681571,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": 1,
"weight": 104857,
"pos": 0
},
{
"id": 3,
"weight": 117964,
"pos": 1
},
{
"id": 9,
"weight": 104857,
"pos": 2
},
{
"id": 11,
"weight": 117964,
"pos": 3
},
{
"id": 24,
"weight": 235929,
"pos": 4
}
]
},
{
"id": -6,
"name": "v1c",
"type_id": 1,
"type_name": "host",
"weight": 511178,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": 14,
"weight": 104857,
"pos": 0
},
{
"id": 15,
"weight": 117964,
"pos": 1
},
{
"id": 16,
"weight": 91750,
"pos": 2
},
{
"id": 18,
"weight": 91750,
"pos": 3
},
{
"id": 17,
"weight": 104857,
"pos": 4
}
]
},
{
"id": -7,
"name": "v1d-ssd",
"type_id": 1,
"type_name": "host",
"weight": 14417,
"alg": "straw",
"hash": "rjenkins1",
"items": [
{
"id": 19,
"weight": 14417,
"pos": 0
}
]
},
{
"id": -9,
"name": "v1c-ssd",
"type_id": 1,
"type_name": "host",
"weight": 26214,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": 10,
"weight": 26214,
"pos": 0
}
]
},
{
"id": -10,
"name": "v1a-ssd",
"type_id": 1,
"type_name": "host",
"weight": 39320,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": 5,
"weight": 19660,
"pos": 0
},
{
"id": 26,
"weight": 19660,
"pos": 1
}
]
},
{
"id": -11,
"name": "v1b-ssd",
"type_id": 1,
"type_name": "host",
"weight": 22282,
"alg": "straw2",
"hash": "rjenkins1",
"items": [
{
"id": 13,
"weight": 22282,
"pos": 0
}
]
}
],
"rules": [
{
"rule_id": 0,
"rule_name": "replicated_ruleset",
"ruleset": 0,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 1,
"rule_name": "ssd",
"ruleset": 1,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -2,
"item_name": "ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
],
"tunables": {
"choose_local_tries": 0,
"choose_local_fallback_tries": 0,
"choose_total_tries": 50,
"chooseleaf_descend_once": 1,
"chooseleaf_vary_r": 1,
"chooseleaf_stable": 0,
"straw_calc_version": 1,
"allowed_bucket_algs": 54,
"profile": "hammer",
"optimal_tunables": 0,
"legacy_tunables": 0,
"minimum_required_version": "hammer",
"require_feature_tunables": 1,
"require_feature_tunables2": 1,
"has_v2_rules": 0,
"require_feature_tunables3": 1,
"has_v3_rules": 0,
"has_v4_buckets": 1,
"require_feature_tunables5": 0,
"has_v5_rules": 0
}
}
>
>
>
>
>
>
> At 2017-07-28 17:47:48, "Nikola Ciprich" <[email protected]> wrote:
> >
> >On Fri, Jul 28, 2017 at 05:43:14PM +0800, linghucongsong wrote:
> >>
> >>
> >> It look like the osd in your cluster is not all the same size.
> >>
> >> can you show ceph osd df output?
> >
> >you're right, they're not.. here's the output:
> >
> >[root@v1b ~]# ceph osd df tree
> >ID WEIGHT REWEIGHT SIZE USE AVAIL %USE VAR PGS TYPE NAME
> > -2 1.55995 - 1706G 883G 805G 51.78 2.55 0 root ssd
> > -9 0.39999 - 393G 221G 171G 56.30 2.78 0 host v1c-ssd
> > 10 0.39999 1.00000 393G 221G 171G 56.30 2.78 98 osd.10
> >-10 0.59998 - 683G 275G 389G 40.39 1.99 0 host v1a-ssd
> > 5 0.29999 1.00000 338G 151G 187G 44.77 2.21 65 osd.5
> > 26 0.29999 1.00000 344G 124G 202G 36.07 1.78 52 osd.26
> >-11 0.34000 - 338G 219G 119G 64.68 3.19 0 host v1b-ssd
> > 13 0.34000 1.00000 338G 219G 119G 64.68 3.19 96 osd.13
> > -7 0.21999 - 290G 166G 123G 57.43 2.83 0 host v1d-ssd
> > 19 0.21999 1.00000 290G 166G 123G 57.43 2.83 73 osd.19
> > -1 39.29982 - 43658G 8312G 34787G 19.04 0.94 0 root default
> > -4 11.89995 - 12806G 2422G 10197G 18.92 0.93 0 host v1a
> > 6 1.59999 1.00000 1833G 358G 1475G 19.53 0.96 366 osd.6
> > 8 1.79999 1.00000 1833G 313G 1519G 17.11 0.84 370 osd.8
> > 2 1.59999 1.00000 1833G 320G 1513G 17.46 0.86 331 osd.2
> > 0 1.70000 1.00000 1804G 431G 1373G 23.90 1.18 359 osd.0
> > 4 1.59999 1.00000 1833G 294G 1539G 16.07 0.79 360 osd.4
> > 25 3.59999 1.00000 3667G 704G 2776G 19.22 0.95 745 osd.25
> > -5 10.39995 - 10914G 2154G 8573G 19.74 0.97 0 host v1b
> > 1 1.59999 1.00000 1804G 350G 1454G 19.42 0.96 409 osd.1
> > 3 1.79999 1.00000 1804G 360G 1444G 19.98 0.99 412 osd.3
> > 9 1.59999 1.00000 1804G 331G 1473G 18.37 0.91 363 osd.9
> > 11 1.79999 1.00000 1833G 367G 1465G 20.06 0.99 415 osd.11
> > 24 3.59999 1.00000 3667G 744G 2736G 20.30 1.00 834 osd.24
> > -6 7.79996 - 9051G 1769G 7282G 19.54 0.96 0 host v1c
> > 14 1.59999 1.00000 1804G 370G 1433G 20.54 1.01 442 osd.14
> > 15 1.79999 1.00000 1833G 383G 1450G 20.92 1.03 447 osd.15
> > 16 1.39999 1.00000 1804G 295G 1508G 16.38 0.81 355 osd.16
> > 18 1.39999 1.00000 1804G 366G 1438G 20.29 1.00 381 osd.18
> > 17 1.59999 1.00000 1804G 353G 1451G 19.57 0.97 429 osd.17
> > -3 9.19997 - 10885G 1965G 8733G 18.06 0.89 0 host v1d-sata
> > 12 1.39999 1.00000 1804G 348G 1455G 19.32 0.95 365 osd.12
> > 20 1.39999 1.00000 1804G 335G 1468G 18.60 0.92 371 osd.20
> > 21 3.59999 1.00000 3667G 695G 2785G 18.97 0.94 871 osd.21
> > 22 1.39999 1.00000 1804G 281G 1522G 15.63 0.77 326 osd.22
> > 23 1.39999 1.00000 1804G 303G 1500G 16.83 0.83 321 osd.23
> > TOTAL 45365G 9195G 35592G 20.27
> >MIN/MAX VAR: 0.77/3.19 STDDEV: 14.69
> >
> >
> >
> >apart from replacing OSDs, how can I help it?
> >
> >
> >
> >
> >>
> >>
> >> At 2017-07-28 17:24:29, "Nikola Ciprich" <[email protected]>
> >> wrote:
> >> >I forgot to add that OSD daemons really seem to be idle, no disk
> >> >activity, no CPU usage.. it just looks to me like some kind of
> >> >deadlock, as they were waiting for each other..
> >> >
> >> >and so I'm trying to get last 1.5% of misplaced / degraded PGs
> >> >for almost a week..
> >> >
> >> >
> >> >On Fri, Jul 28, 2017 at 10:56:02AM +0200, Nikola Ciprich wrote:
> >> >> Hi,
> >> >>
> >> >> I'm trying to find reason for strange recovery issues I'm seeing on
> >> >> our cluster..
> >> >>
> >> >> it's mostly idle, 4 node cluster with 26 OSDs evenly distributed
> >> >> across nodes. jewel 10.2.9
> >> >>
> >> >> the problem is that after some disk replaces and data moves, recovery
> >> >> is progressing extremely slowly.. pgs seem to be stuck in
> >> >> active+recovering+degraded
> >> >> state:
> >> >>
> >> >> [root@v1d ~]# ceph -s
> >> >> cluster a5efbc87-3900-4c42-a977-8c93f7aa8c33
> >> >> health HEALTH_WARN
> >> >> 159 pgs backfill_wait
> >> >> 4 pgs backfilling
> >> >> 259 pgs degraded
> >> >> 12 pgs recovering
> >> >> 113 pgs recovery_wait
> >> >> 215 pgs stuck degraded
> >> >> 266 pgs stuck unclean
> >> >> 140 pgs stuck undersized
> >> >> 151 pgs undersized
> >> >> recovery 37788/2327775 objects degraded (1.623%)
> >> >> recovery 23854/2327775 objects misplaced (1.025%)
> >> >> noout,noin flag(s) set
> >> >> monmap e21: 3 mons at
> >> >> {v1a=10.0.0.1:6789/0,v1b=10.0.0.2:6789/0,v1c=10.0.0.3:6789/0}
> >> >> election epoch 6160, quorum 0,1,2 v1a,v1b,v1c
> >> >> fsmap e817: 1/1/1 up {0=v1a=up:active}, 1 up:standby
> >> >> osdmap e76002: 26 osds: 26 up, 26 in; 185 remapped pgs
> >> >> flags noout,noin,sortbitwise,require_jewel_osds
> >> >> pgmap v80995844: 3200 pgs, 4 pools, 2876 GB data, 757 kobjects
> >> >> 9215 GB used, 35572 GB / 45365 GB avail
> >> >> 37788/2327775 objects degraded (1.623%)
> >> >> 23854/2327775 objects misplaced (1.025%)
> >> >> 2912 active+clean
> >> >> 130 active+undersized+degraded+remapped+wait_backfill
> >> >> 97 active+recovery_wait+degraded
> >> >> 29 active+remapped+wait_backfill
> >> >> 12 active+recovery_wait+undersized+degraded+remapped
> >> >> 6 active+recovering+degraded
> >> >> 5 active+recovering+undersized+degraded+remapped
> >> >> 4 active+undersized+degraded+remapped+backfilling
> >> >> 4 active+recovery_wait+degraded+remapped
> >> >> 1 active+recovering+degraded+remapped
> >> >> client io 2026 B/s rd, 146 kB/s wr, 9 op/s rd, 21 op/s wr
> >> >>
> >> >>
> >> >> when I restart affected OSDs, it bumps the recovery, but then another
> >> >> PGs get stuck.. All OSDs were restarted multiple times, none are even
> >> >> close to
> >> >> nearfull, I just cant find what I'm doing wrong..
> >> >>
> >> >> possibly related OSD options:
> >> >>
> >> >> osd max backfills = 4
> >> >> osd recovery max active = 15
> >> >> debug osd = 0/0
> >> >> osd op threads = 4
> >> >> osd backfill scan min = 4
> >> >> osd backfill scan max = 16
> >> >>
> >> >> Any hints would be greatly appreciated
> >> >>
> >> >> thanks
> >> >>
> >> >> nik
> >> >>
> >> >>
> >> >> --
> >> >> -------------------------------------
> >> >> Ing. Nikola CIPRICH
> >> >> LinuxBox.cz, s.r.o.
> >> >> 28.rijna 168, 709 00 Ostrava
> >> >>
> >> >> tel.: +420 591 166 214
> >> >> fax: +420 596 621 273
> >> >> mobil: +420 777 093 799
> >> >> www.linuxbox.cz
> >> >>
> >> >> mobil servis: +420 737 238 656
> >> >> email servis: [email protected]
> >> >> -------------------------------------
> >> >> _______________________________________________
> >> >> ceph-users mailing list
> >> >> [email protected]
> >> >> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
> >> >>
> >> >
> >> >--
> >> >-------------------------------------
> >> >Ing. Nikola CIPRICH
> >> >LinuxBox.cz, s.r.o.
> >> >28.rijna 168, 709 00 Ostrava
> >> >
> >> >tel.: +420 591 166 214
> >> >fax: +420 596 621 273
> >> >mobil: +420 777 093 799
> >> >www.linuxbox.cz
> >> >
> >> >mobil servis: +420 737 238 656
> >> >email servis: [email protected]
> >> >-------------------------------------
> >> >_______________________________________________
> >> >ceph-users mailing list
> >> >[email protected]
> >> >http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
> >
> >--
> >-------------------------------------
> >Ing. Nikola CIPRICH
> >LinuxBox.cz, s.r.o.
> >28.rijna 168, 709 00 Ostrava
> >
> >tel.: +420 591 166 214
> >fax: +420 596 621 273
> >mobil: +420 777 093 799
> >www.linuxbox.cz
> >
> >mobil servis: +420 737 238 656
> >email servis: [email protected]
> >-------------------------------------
--
-------------------------------------
Ing. Nikola CIPRICH
LinuxBox.cz, s.r.o.
28.rijna 168, 709 00 Ostrava
tel.: +420 591 166 214
fax: +420 596 621 273
mobil: +420 777 093 799
www.linuxbox.cz
mobil servis: +420 737 238 656
email servis: [email protected]
-------------------------------------
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com