Hello,

we upgraded to Quincy and tried to remove an obsolete part:

In the beginning of Ceph, there where no device classes and we created rules, to split them into hdd and ssd on one of our datacenters.

https://www.sebastien-han.fr/blog/2014/08/25/ceph-mix-sata-and-ssd-within-the-same-box/

So we had different "roots" for SSD and HDD. Two weeks ago .. we tried to move the hosts to the root=default and checked .. what happens .. nothing .. all was fine and working. But we did not checked the "ceph df":

==========
root@fc-r02-ceph-osd-01:[~]: ceph osd df tree
ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -14 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - root sata -18 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - datacenter fc-sata -16 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - rack r02-sata -13 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - root ssds -17 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - datacenter fc-ssds -15 0 - 0 B 0 B 0 B 0 B 0 B 0 B 0 0 - rack r02-ssds -1 23.99060 - 23 TiB 13 TiB 12 TiB 6.0 GiB 32 GiB 11 TiB 54.17 1.00 - root default -6 4.00145 - 3.9 TiB 2.1 TiB 2.1 TiB 2.1 MiB 7.2 GiB 1.7 TiB 54.87 1.01 - host fc-r02-ceph-osd-01 0 ssd 0.45470 1.00000 447 GiB 236 GiB 235 GiB 236 KiB 794 MiB 211 GiB 52.80 0.97 119 up osd.0 1 ssd 0.45470 1.00000 447 GiB 222 GiB 221 GiB 239 KiB 808 MiB 225 GiB 49.67 0.92 108 up osd.1 2 ssd 0.45470 1.00000 447 GiB 245 GiB 244 GiB 254 KiB 819 MiB 202 GiB 54.85 1.01 118 up osd.2 3 ssd 0.45470 1.00000 447 GiB 276 GiB 276 GiB 288 KiB 903 MiB 171 GiB 61.83 1.14 135 up osd.3 4 ssd 0.45470 1.00000 447 GiB 268 GiB 267 GiB 272 KiB 913 MiB 180 GiB 59.85 1.10 132 up osd.4 5 ssd 0.45470 1.00000 447 GiB 204 GiB 203 GiB 181 KiB 684 MiB 243 GiB 45.56 0.84 108 up osd.5 41 ssd 0.36388 1.00000 373 GiB 211 GiB 210 GiB 207 KiB 818 MiB 161 GiB 56.69 1.05 104 up osd.41 42 ssd 0.45470 1.00000 447 GiB 220 GiB 219 GiB 214 KiB 791 MiB 227 GiB 49.26 0.91 107 up osd.42 48 ssd 0.45470 1.00000 447 GiB 284 GiB 284 GiB 281 KiB 864 MiB 163 GiB 63.62 1.17 139 up osd.48 -2 3.98335 - 3.9 TiB 2.1 TiB 2.1 TiB 1.0 GiB 5.0 GiB 1.7 TiB 54.82 1.01 - host fc-r02-ceph-osd-02 36 nvme 0.36388 1.00000 373 GiB 239 GiB 238 GiB 163 MiB 460 MiB 134 GiB 64.10 1.18 127 up osd.36 6 ssd 0.45470 1.00000 447 GiB 247 GiB 246 GiB 114 MiB 585 MiB 200 GiB 55.20 1.02 121 up osd.6 7 ssd 0.45470 1.00000 447 GiB 260 GiB 259 GiB 158 MiB 590 MiB 187 GiB 58.19 1.07 126 up osd.7 8 ssd 0.45470 1.00000 447 GiB 196 GiB 195 GiB 165 MiB 471 MiB 251 GiB 43.85 0.81 101 up osd.8 9 ssd 0.45470 1.00000 447 GiB 203 GiB 202 GiB 168 MiB 407 MiB 244 GiB 45.34 0.84 104 up osd.9 10 ssd 0.43660 1.00000 447 GiB 284 GiB 283 GiB 287 KiB 777 MiB 163 GiB 63.49 1.17 142 up osd.10 29 ssd 0.45470 1.00000 447 GiB 241 GiB 240 GiB 147 MiB 492 MiB 206 GiB 53.93 1.00 124 up osd.29 43 ssd 0.45470 1.00000 447 GiB 257 GiB 256 GiB 151 MiB 509 MiB 190 GiB 57.48 1.06 131 up osd.43 49 ssd 0.45470 1.00000 447 GiB 239 GiB 238 GiB 242 KiB 820 MiB 209 GiB 53.35 0.98 123 up osd.49 -5 4.00145 - 3.9 TiB 2.1 TiB 2.1 TiB 1.3 GiB 4.9 GiB 1.7 TiB 55.41 1.02 - host fc-r02-ceph-osd-03 40 nvme 0.36388 1.00000 373 GiB 236 GiB 235 GiB 156 MiB 469 MiB 137 GiB 63.26 1.17 119 up osd.40 11 ssd 0.45470 1.00000 447 GiB 244 GiB 244 GiB 187 MiB 602 MiB 203 GiB 54.68 1.01 123 up osd.11 12 ssd 0.45470 1.00000 447 GiB 235 GiB 235 GiB 163 MiB 496 MiB 212 GiB 52.65 0.97 122 up osd.12 13 ssd 0.45470 1.00000 447 GiB 236 GiB 235 GiB 114 MiB 594 MiB 211 GiB 52.79 0.97 124 up osd.13 14 ssd 0.45470 1.00000 447 GiB 259 GiB 258 GiB 145 MiB 475 MiB 188 GiB 57.87 1.07 126 up osd.14 15 ssd 0.45470 1.00000 447 GiB 267 GiB 266 GiB 181 MiB 662 MiB 180 GiB 59.73 1.10 137 up osd.15 16 ssd 0.45470 1.00000 447 GiB 256 GiB 255 GiB 183 MiB 478 MiB 191 GiB 57.19 1.06 134 up osd.16 44 ssd 0.45470 1.00000 447 GiB 229 GiB 228 GiB 188 MiB 579 MiB 219 GiB 51.12 0.94 117 up osd.44 50 ssd 0.45470 1.00000 447 GiB 227 GiB 226 GiB 215 KiB 688 MiB 220 GiB 50.72 0.94 113 up osd.50 -9 4.00145 - 3.9 TiB 2.0 TiB 2.0 TiB 1.2 GiB 4.8 GiB 1.8 TiB 52.17 0.96 - host fc-r02-ceph-osd-04 37 nvme 0.36388 1.00000 373 GiB 198 GiB 198 GiB 166 MiB 561 MiB 174 GiB 53.23 0.98 103 up osd.37 30 ssd 0.45470 1.00000 447 GiB 246 GiB 245 GiB 144 MiB 494 MiB 201 GiB 55.05 1.02 128 up osd.30 31 ssd 0.45470 1.00000 447 GiB 250 GiB 249 GiB 159 MiB 598 MiB 197 GiB 55.86 1.03 128 up osd.31 32 ssd 0.45470 1.00000 447 GiB 238 GiB 238 GiB 173 MiB 459 MiB 209 GiB 53.30 0.98 124 up osd.32 33 ssd 0.45470 1.00000 447 GiB 209 GiB 208 GiB 134 MiB 417 MiB 238 GiB 46.67 0.86 107 up osd.33 34 ssd 0.45470 1.00000 447 GiB 231 GiB 230 GiB 132 MiB 588 MiB 216 GiB 51.71 0.95 115 up osd.34 35 ssd 0.45470 1.00000 447 GiB 223 GiB 223 GiB 135 MiB 537 MiB 224 GiB 49.95 0.92 112 up osd.35 45 ssd 0.45470 1.00000 447 GiB 249 GiB 248 GiB 152 MiB 571 MiB 198 GiB 55.64 1.03 126 up osd.45 51 ssd 0.45470 1.00000 447 GiB 216 GiB 215 GiB 212 KiB 726 MiB 231 GiB 48.34 0.89 110 up osd.51 -11 4.00145 - 3.9 TiB 2.1 TiB 2.1 TiB 1.2 GiB 5.1 GiB 1.8 TiB 53.77 0.99 - host fc-r02-ceph-osd-05 38 nvme 0.36388 1.00000 373 GiB 193 GiB 192 GiB 104 MiB 561 MiB 180 GiB 51.77 0.96 99 up osd.38 17 ssd 0.45470 1.00000 447 GiB 216 GiB 216 GiB 184 MiB 440 MiB 231 GiB 48.40 0.89 114 up osd.17 18 ssd 0.45470 1.00000 447 GiB 255 GiB 255 GiB 178 MiB 540 MiB 192 GiB 57.12 1.05 132 up osd.18 19 ssd 0.45470 1.00000 447 GiB 236 GiB 235 GiB 143 MiB 571 MiB 211 GiB 52.78 0.97 121 up osd.19 20 ssd 0.45470 1.00000 447 GiB 205 GiB 204 GiB 136 MiB 410 MiB 242 GiB 45.79 0.85 105 up osd.20 21 ssd 0.45470 1.00000 447 GiB 233 GiB 232 GiB 149 MiB 459 MiB 214 GiB 52.02 0.96 119 up osd.21 22 ssd 0.45470 1.00000 447 GiB 283 GiB 282 GiB 174 MiB 708 MiB 164 GiB 63.33 1.17 143 up osd.22 46 ssd 0.45470 1.00000 447 GiB 230 GiB 229 GiB 197 MiB 493 MiB 217 GiB 51.41 0.95 112 up osd.46 52 ssd 0.45470 1.00000 447 GiB 273 GiB 272 GiB 309 KiB 1021 MiB 174 GiB 60.99 1.13 141 up osd.52 -25 4.00145 - 3.9 TiB 2.1 TiB 2.1 TiB 1.3 GiB 5.1 GiB 1.8 TiB 53.95 1.00 - host fc-r02-ceph-osd-06 39 nvme 0.36388 1.00000 373 GiB 211 GiB 210 GiB 126 MiB 435 MiB 162 GiB 56.56 1.04 106 up osd.39 23 ssd 0.45470 1.00000 447 GiB 221 GiB 221 GiB 192 MiB 565 MiB 226 GiB 49.51 0.91 120 up osd.23 24 ssd 0.45470 1.00000 447 GiB 270 GiB 270 GiB 204 MiB 540 MiB 177 GiB 60.51 1.12 140 up osd.24 25 ssd 0.45470 1.00000 447 GiB 224 GiB 223 GiB 119 MiB 514 MiB 223 GiB 50.07 0.92 113 up osd.25 26 ssd 0.45470 1.00000 447 GiB 249 GiB 249 GiB 182 MiB 603 MiB 198 GiB 55.77 1.03 127 up osd.26 27 ssd 0.45470 1.00000 447 GiB 229 GiB 228 GiB 206 MiB 495 MiB 218 GiB 51.17 0.94 114 up osd.27 28 ssd 0.45470 1.00000 447 GiB 231 GiB 230 GiB 147 MiB 629 MiB 216 GiB 51.68 0.95 119 up osd.28 47 ssd 0.45470 1.00000 447 GiB 237 GiB 236 GiB 170 MiB 586 MiB 210 GiB 53.00 0.98 131 up osd.47 53 ssd 0.45470 1.00000 447 GiB 258 GiB 257 GiB 263 KiB 829 MiB 189 GiB 57.69 1.07 138 up osd.53 TOTAL 23 TiB 13 TiB 12 TiB 6.0 GiB 32 GiB 11 TiB 54.17
MIN/MAX VAR: 0.81/1.18  STDDEV: 5.03
==========

Then .. today .. I saw then (via Proxmox add disk to a VM), that the "ssd-pool" has 100% usage, what could not be true. You can see it also in the output:

==========================
root@fc-r02-ceph-osd-01:[~]: ceph df
--- RAW STORAGE ---
CLASS     SIZE    AVAIL     USED  RAW USED  %RAW USED
nvme   1.8 TiB  786 GiB  1.1 TiB   1.1 TiB      57.79
ssd     21 TiB  9.8 TiB   11 TiB    11 TiB      53.86
TOTAL   23 TiB   11 TiB   13 TiB    13 TiB      54.17

--- POOLS ---
POOL      ID   PGS   STORED  OBJECTS     USED   %USED  MAX AVAIL
ssd-pool   1  2048  4.2 TiB    1.14M   12 TiB  100.00        0 B
db-pool    4   128   50 MiB        3  151 MiB  100.00        0 B
.mgr       5     1   43 MiB       12  130 MiB       0    2.4 TiB
==========================

So we really missed something:

==========================
root@fc-r02-ceph-osd-01:[~]: ceph osd pool get ssd-pool crush_rule
crush_rule: fc-r02-ssdpool
root@fc-r02-ceph-osd-01:[~]: ceph osd pool get db-pool crush_rule
crush_rule: fc-r02-ssdpool
==========================


Below is the (pretty old) crushmap.

So, which steps are needed, to get it right ? And will it be causing rebalancing (so better to take the datacenter offline?) ?

==========================
root@fc-r02-ceph-osd-01:[~]:  ceph osd crush rule dump
[
    {
        "rule_id": 0,
        "rule_name": "replicated_rule",
        "type": 1,
        "steps": [
            {
                "op": "take",
                "item": -1,
                "item_name": "default"
            },
            {
                "op": "chooseleaf_firstn",
                "num": 0,
                "type": "host"
            },
            {
                "op": "emit"
            }
        ]
    },
    {
        "rule_id": 1,
        "rule_name": "fc-r02-ssdpool",
        "type": 1,
        "steps": [
            {
                "op": "take",
                "item": -15,
                "item_name": "r02-ssds"
            },
            {
                "op": "chooseleaf_firstn",
                "num": 0,
                "type": "host"
            },
            {
                "op": "emit"
            }
        ]
    },
    {
        "rule_id": 2,
        "rule_name": "fc-r02-satapool",
        "type": 1,
        "steps": [
            {
                "op": "take",
                "item": -16,
                "item_name": "r02-sata"
            },
            {
                "op": "chooseleaf_firstn",
                "num": 0,
                "type": "host"
            },
            {
                "op": "emit"
            }
        ]
    },
    {
        "rule_id": 3,
        "rule_name": "fc-r02-ssd",
        "type": 1,
        "steps": [
            {
                "op": "take",
                "item": -4,
                "item_name": "default~ssd"
            },
            {
                "op": "chooseleaf_firstn",
                "num": 0,
                "type": "host"
            },
            {
                "op": "emit"
            }
        ]
    }
]
============================================

cu denny
_______________________________________________
ceph-users mailing list -- ceph-users@ceph.io
To unsubscribe send an email to ceph-users-le...@ceph.io

Reply via email to