fio is fine and megacli setings are as below ( device with WT is the SSD)
Vendor Id : TOSHIBA
Product Id : PX05SMB040Y
Capacity : 372.0 GB
Results
Jobs: 20 (f=20): [W(20)] [100.0% done] [0KB/447.1MB/0KB /s] [0/115K/0 iops]
[eta 00m:00s]
Vendor Id : SEAGATE
Product Id : ST600MM0006
Capacity : 558.375 GB
Results
Jobs: 10 (f=10): [W(10)] [100.0% done] [0KB/100.5MB/0KB /s] [0/25.8K/0
iops] [eta 00m:00s]
megacli -LDGetProp -cache -Lall -a0
Adapter 0-VD 0(target id: 0): Cache Policy:WriteThrough, ReadAheadNone,
Direct, Write Cache OK if bad BBU
Adapter 0-VD 1(target id: 1): Cache Policy:WriteBack, ReadAdaptive, Cached,
No Write Cache if bad BBU
Adapter 0-VD 2(target id: 2): Cache Policy:WriteBack, ReadAdaptive, Cached,
No Write Cache if bad BBU
Adapter 0-VD 3(target id: 3): Cache Policy:WriteBack, ReadAdaptive, Cached,
No Write Cache if bad BBU
Exit Code: 0x00
[root@osd01 ~]# megacli -LDGetProp -dskcache -Lall -a0
Adapter 0-VD 0(target id: 0): Disk Write Cache : Disk's Default
Adapter 0-VD 1(target id: 1): Disk Write Cache : Disk's Default
Adapter 0-VD 2(target id: 2): Disk Write Cache : Disk's Default
Adapter 0-VD 3(target id: 3): Disk Write Cache : Disk's Default
On Thu, 19 Apr 2018 at 14:22, Hans van den Bogert <[email protected]>
wrote:
> I see, the second one is the read bench. Even in the 2 node scenario the
> read performance is pretty bad. Have you verified the hardware with micro
> benchmarks such as 'fio'? Also try to review storage controller settings.
>
> On Apr 19, 2018 5:13 PM, "Steven Vacaroaia" <[email protected]> wrote:
>
> replication size is always 2
>
> DB/WAL on HDD in this case
>
> I tried with OSDs with WAL/DB on SSD - they exhibit the same symptoms (
> cur MB/s 0 )
>
> In summary, it does not matter
> - which server ( any 2 will work better than any 3 or 4)
> - replication size ( it tried with size 2 and 3 )
> - location of WAL/DB ( on separate SSD or same HDD)
>
>
> Thanks
> Steven
>
> On Thu, 19 Apr 2018 at 12:06, Hans van den Bogert <[email protected]>
> wrote:
>
>> I take it that the first bench is with replication size 2, the second
>> bench is with replication size 3? Same for the 4 node OSD scenario?
>>
>> Also please let us know how you setup block.db and Wal, are they on the
>> SSD?
>>
>> On Thu, Apr 19, 2018, 14:40 Steven Vacaroaia <[email protected]> wrote:
>>
>>> Sure ..thanks for your willingness to help
>>>
>>> Identical servers
>>>
>>> Hardware
>>> DELL R620, 6 cores, 64GB RAM, 2 x 10 GB ports,
>>> Enterprise HDD 600GB( Seagate ST600MM0006), Enterprise grade SSD 340GB
>>> (Toshiba PX05SMB040Y)
>>>
>>>
>>> All tests done with the following command
>>> rados bench -p rbd 50 write --no-cleanup && rados bench -p rbd 50 seq
>>>
>>>
>>> ceph osd pool ls detail
>>> "pool_name": "rbd",
>>> "flags": 1,
>>> "flags_names": "hashpspool",
>>> "type": 1,
>>> "size": 2,
>>> "min_size": 1,
>>> "crush_rule": 1,
>>> "object_hash": 2,
>>> "pg_num": 64,
>>> "pg_placement_num": 64,
>>> "crash_replay_interval": 0,
>>> "last_change": "354",
>>> "last_force_op_resend": "0",
>>> "last_force_op_resend_preluminous": "0",
>>> "auid": 0,
>>> "snap_mode": "selfmanaged",
>>> "snap_seq": 0,
>>> "snap_epoch": 0,
>>> "pool_snaps": [],
>>> "removed_snaps": "[]",
>>> "quota_max_bytes": 0,
>>> "quota_max_objects": 0,
>>> "tiers": [],
>>> "tier_of": -1,
>>> "read_tier": -1,
>>> "write_tier": -1,
>>> "cache_mode": "none",
>>> "target_max_bytes": 0,
>>> "target_max_objects": 0,
>>> "cache_target_dirty_ratio_micro": 400000,
>>> "cache_target_dirty_high_ratio_micro": 600000,
>>> "cache_target_full_ratio_micro": 800000,
>>> "cache_min_flush_age": 0,
>>> "cache_min_evict_age": 0,
>>> "erasure_code_profile": "",
>>> "hit_set_params": {
>>> "type": "none"
>>> },
>>> "hit_set_period": 0,
>>> "hit_set_count": 0,
>>> "use_gmt_hitset": true,
>>> "min_read_recency_for_promote": 0,
>>> "min_write_recency_for_promote": 0,
>>> "hit_set_grade_decay_rate": 0,
>>> "hit_set_search_last_n": 0,
>>> "grade_table": [],
>>> "stripe_width": 0,
>>> "expected_num_objects": 0,
>>> "fast_read": false,
>>> "options": {},
>>> "application_metadata": {}
>>> }
>>>
>>>
>>> ceph osd crush rule dump
>>> [
>>> {
>>> "rule_id": 0,
>>> "rule_name": "replicated_rule",
>>> "ruleset": 0,
>>> "type": 1,
>>> "min_size": 1,
>>> "max_size": 10,
>>> "steps": [
>>> {
>>> "op": "take",
>>> "item": -1,
>>> "item_name": "default"
>>> },
>>> {
>>> "op": "chooseleaf_firstn",
>>> "num": 0,
>>> "type": "host"
>>> },
>>> {
>>> "op": "emit"
>>> }
>>> ]
>>> },
>>> {
>>> "rule_id": 1,
>>> "rule_name": "rbd",
>>> "ruleset": 1,
>>> "type": 1,
>>> "min_size": 1,
>>> "max_size": 10,
>>> "steps": [
>>> {
>>> "op": "take",
>>> "item": -9,
>>> "item_name": "sas"
>>> },
>>> {
>>> "op": "chooseleaf_firstn",
>>> "num": 0,
>>> "type": "host"
>>> },
>>> {
>>> "op": "emit"
>>> }
>>> ]
>>> }
>>> ]
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> 2 servers, 2 OSD
>>>
>>> ceph osd tree
>>> ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
>>> -9 4.00000 root sas
>>> -10 1.00000 host osd01-sas
>>> 2 hdd 1.00000 osd.2 up 0 1.00000
>>> -11 1.00000 host osd02-sas
>>> 3 hdd 1.00000 osd.3 up 0 1.00000
>>> -12 1.00000 host osd03-sas
>>> 5 hdd 1.00000 osd.5 up 1.00000 1.00000
>>> -19 1.00000 host osd04-sas
>>> 6 hdd 1.00000 osd.6 up 1.00000 1.00000
>>>
>>>
>>> 2018-04-19 09:19:01.266010 min lat: 0.0412473 max lat: 1.03227 avg lat:
>>> 0.331163
>>> sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg
>>> lat(s)
>>> 40 16 1941 1925 192.478 192 0.315461
>>> 0.331163
>>> 41 16 1984 1968 191.978 172 0.262268
>>> 0.331529
>>> 42 16 2032 2016 191.978 192 0.326608
>>> 0.332061
>>> 43 16 2081 2065 192.071 196 0.345757
>>> 0.332389
>>> 44 16 2123 2107 191.524 168 0.307759
>>> 0.332745
>>> 45 16 2166 2150 191.09 172 0.318577
>>> 0.333613
>>> 46 16 2214 2198 191.109 192 0.329559
>>> 0.333703
>>> 47 16 2257 2241 190.702 172 0.423664
>>> 0.33427
>>> 48 16 2305 2289 190.729 192 0.357342
>>> 0.334386
>>> 49 16 2348 2332 190.346 172 0.30218
>>> 0.334735
>>> 50 16 2396 2380 190.379 192 0.318226
>>> 0.334981
>>> Total time run: 50.281886
>>> Total writes made: 2397
>>> Write size: 4194304
>>> Object size: 4194304
>>> Bandwidth (MB/sec): 190.685
>>> Stddev Bandwidth: 24.5781
>>> Max bandwidth (MB/sec): 340
>>> Min bandwidth (MB/sec): 164
>>> Average IOPS: 47
>>> Stddev IOPS: 6
>>> Max IOPS: 85
>>> Min IOPS: 41
>>> Average Latency(s): 0.335515
>>> Stddev Latency(s): 0.0867836
>>> Max latency(s): 1.03227
>>> Min latency(s): 0.0412473
>>>
>>> 2018-04-19 09:19:52.340092 min lat: 0.0209445 max lat: 14.9208 avg lat:
>>> 1.31352
>>> sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg
>>> lat(s)
>>> 40 16 296 280 27.9973 0 -
>>> 1.31352
>>> 41 16 296 280 27.3144 0 -
>>> 1.31352
>>> 42 16 296 280 26.664 0 -
>>> 1.31352
>>> 43 16 323 307 28.5553 9 0.0429661
>>> 2.20267
>>> 44 16 323 307 27.9063 0 -
>>> 2.20267
>>> 45 16 363 347 30.8414 80 0.0922424
>>> 2.05975
>>> 46 16 370 354 30.7795 28 0.0302223
>>> 2.02055
>>> 47 16 370 354 30.1246 0 -
>>> 2.02055
>>> 48 16 386 370 30.8303 32 2.72624
>>> 2.06407
>>> 49 16 386 370 30.2011 0 -
>>> 2.06407
>>> 50 16 400 384 30.7169 28 2.10543
>>> 2.07055
>>> 51 16 401 385 30.1931 4 2.53183
>>> 2.07175
>>> 52 16 401 385 29.6124 0 -
>>> 2.07175
>>> 53 16 401 385 29.0537 0 -
>>> 2.07175
>>> 54 16 401 385 28.5157 0 -
>>> 2.07175
>>> 55 16 401 385 27.9972 0 -
>>> 2.07175
>>> 56 16 401 385 27.4972 0 -
>>> 2.07175
>>> Total time run: 56.042520
>>> Total reads made: 401
>>> Read size: 4194304
>>> Object size: 4194304
>>> Bandwidth (MB/sec): 28.6211
>>> Average IOPS: 7
>>> Stddev IOPS: 11
>>> Max IOPS: 47
>>> Min IOPS: 0
>>> Average Latency(s): 2.23525
>>> Max latency(s): 29.5553
>>> Min latency(s): 0.0209445
>>>
>>>
>>>
>>>
>>>
>>> 4 servers, 4 osds
>>>
>>> ceph osd tree
>>> ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
>>> -9 4.00000 root sas
>>> -10 1.00000 host osd01-sas
>>> 2 hdd 1.00000 osd.2 up 1.00000 1.00000
>>> -11 1.00000 host osd02-sas
>>> 3 hdd 1.00000 osd.3 up 1.00000 1.00000
>>> -12 1.00000 host osd03-sas
>>> 5 hdd 1.00000 osd.5 up 1.00000 1.00000
>>> -19 1.00000 host osd04-sas
>>> 6 hdd 1.00000 osd.6 up 1.00000 1.00000
>>>
>>>
>>>
>>>
>>> 2018-04-19 09:35:43.558843 min lat: 0.0141657 max lat: 11.3013 avg lat:
>>> 1.25618
>>> sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg
>>> lat(s)
>>> 40 16 482 466 46.5956 0 -
>>> 1.25618
>>> 41 16 488 472 46.0444 12 0.0175485
>>> 1.25181
>>> 42 16 488 472 44.9481 0 -
>>> 1.25181
>>> 43 16 488 472 43.9028 0 -
>>> 1.25181
>>> 44 16 562 546 49.6316 98.6667 0.0150341
>>> 1.26385
>>> 45 16 569 553 49.1508 28 0.0151556
>>> 1.25516
>>> 46 16 569 553 48.0823 0 -
>>> 1.25516
>>> 47 16 569 553 47.0593 0 -
>>> 1.25516
>>> 48 16 569 553 46.0789 0 -
>>> 1.25516
>>> 49 16 569 553 45.1386 0 -
>>> 1.25516
>>> 50 16 569 553 44.2358 0 -
>>> 1.25516
>>> 51 16 569 553 43.3684 0 -
>>> 1.25516
>>> Total time run: 51.724920
>>> Total writes made: 570
>>> Write size: 4194304
>>> Object size: 4194304
>>> Bandwidth (MB/sec): 44.0793
>>> Stddev Bandwidth: 55.3843
>>> Max bandwidth (MB/sec): 232
>>> Min bandwidth (MB/sec): 0
>>> Average IOPS: 11
>>> Stddev IOPS: 13
>>> Max IOPS: 58
>>> Min IOPS: 0
>>> Average Latency(s): 1.45175
>>> Stddev Latency(s): 2.9411
>>> Max latency(s): 11.3013
>>> Min latency(s): 0.0141657
>>>
>>>
>>>
>>> 2018-04-19 09:36:35.633624 min lat: 0.00804825 max lat: 10.2583 avg lat:
>>> 1.03388
>>> sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg
>>> lat(s)
>>> 40 16 479 463 46.2955 0 -
>>> 1.03388
>>> 41 16 540 524 51.1169 24.4 0.00913275
>>> 1.23193
>>> 42 16 540 524 49.8999 0 -
>>> 1.23193
>>> 43 16 541 525 48.8324 2 2.31401
>>> 1.23399
>>> 44 16 541 525 47.7226 0 -
>>> 1.23399
>>> 45 16 541 525 46.6621 0 -
>>> 1.23399
>>> 46 16 541 525 45.6477 0 -
>>> 1.23399
>>> 47 16 541 525 44.6765 0 -
>>> 1.23399
>>> 48 16 541 525 43.7458 0 -
>>> 1.23399
>>> 49 16 541 525 42.853 0 -
>>> 1.23399
>>> 50 16 541 525 41.996 0 -
>>> 1.23399
>>> 51 16 541 525 41.1725 0 -
>>> 1.23399
>>> Total time run: 51.530655
>>> Total reads made: 542
>>> Read size: 4194304
>>> Object size: 4194304
>>> Bandwidth (MB/sec): 42.072
>>> Average IOPS: 10
>>> Stddev IOPS: 15
>>> Max IOPS: 62
>>> Min IOPS: 0
>>> Average Latency(s): 1.5204
>>> Max latency(s): 11.4841
>>> Min latency(s): 0.00627081
>>>
>>>
>>>
>>> Many thanks
>>> Steven
>>>
>>>
>>>
>>>
>>> On Thu, 19 Apr 2018 at 08:42, Hans van den Bogert <[email protected]>
>>> wrote:
>>>
>>>> Hi Steven,
>>>>
>>>> There is only one bench. Could you show multiple benches of the
>>>> different scenarios you discussed? Also provide hardware details.
>>>>
>>>> Hans
>>>>
>>>> On Apr 19, 2018 13:11, "Steven Vacaroaia" <[email protected]> wrote:
>>>>
>>>> Hi,
>>>>
>>>> Any idea why 2 servers with one OSD each will provide better
>>>> performance than 3 ?
>>>>
>>>> Servers are identical
>>>> Performance is impacted irrespective if I used SSD for WAL/DB or not
>>>> Basically, I am getting lots of cur MB/s zero
>>>>
>>>> Network is separate 10 GB for public and private
>>>> I tested it with iperf and I am getting 9.3 Gbs
>>>>
>>>> I have tried replication by 2 and 3 with same results ( much better for
>>>> 2 servers than 3 )
>>>>
>>>> reinstalled CEPH multiple times
>>>> ceph.conf very simple - no major customization ( see below)
>>>> I am out of ideas - any hint will be TRULY appreciated
>>>>
>>>> Steven
>>>>
>>>>
>>>>
>>>> auth_cluster_required = cephx
>>>> auth_service_required = cephx
>>>> auth_client_required = cephx
>>>>
>>>>
>>>> public_network = 10.10.30.0/24
>>>> cluster_network = 192.168.0.0/24
>>>>
>>>>
>>>> osd_pool_default_size = 2
>>>> osd_pool_default_min_size = 1 # Allow writing 1 copy in a degraded state
>>>> osd_crush_chooseleaf_type = 1
>>>>
>>>>
>>>> [mon]
>>>> mon_allow_pool_delete = true
>>>> mon_osd_min_down_reporters = 1
>>>>
>>>> [osd]
>>>> osd_mkfs_type = xfs
>>>> osd_mount_options_xfs =
>>>> "rw,noatime,nodiratime,attr2,logbufs=8,logbsize=256k,largeio,inode64,swalloc,allocsize=4M"
>>>> osd_mkfs_options_xfs = "-f -i size=2048"
>>>> bluestore_block_db_size = 32212254720
>>>> bluestore_block_wal_size = 1073741824
>>>>
>>>> rados bench -p rbd 120 write --no-cleanup && rados bench -p rbd 120 seq
>>>> hints = 1
>>>> Maintaining 16 concurrent writes of 4194304 bytes to objects of size
>>>> 4194304 for up to 120 seconds or 0 objects
>>>> Object prefix: benchmark_data_osd01_383626
>>>> sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg
>>>> lat(s)
>>>> 0 0 0 0 0 0 -
>>>> 0
>>>> 1 16 57 41 163.991 164 0.197929
>>>> 0.065543
>>>> 2 16 57 41 81.992 0 -
>>>> 0.065543
>>>> 3 16 67 51 67.9936 20 0.0164632
>>>> 0.249939
>>>> 4 16 67 51 50.9951 0 -
>>>> 0.249939
>>>> 5 16 71 55 43.9958 8 0.0171439
>>>> 0.319973
>>>> 6 16 181 165 109.989 440 0.0159057
>>>> 0.563746
>>>> 7 16 182 166 94.8476 4 0.221421
>>>> 0.561684
>>>> 8 16 182 166 82.9917 0 -
>>>> 0.561684
>>>> 9 16 240 224 99.5458 116 0.0232989
>>>> 0.638292
>>>> 10 16 264 248 99.1901 96 0.0222669
>>>> 0.583336
>>>> 11 16 264 248 90.1729 0 -
>>>> 0.583336
>>>> 12 16 285 269 89.6579 42 0.0165706
>>>> 0.600606
>>>> 13 16 285 269 82.7611 0 -
>>>> 0.600606
>>>> 14 16 310 294 83.9918 50 0.0254241
>>>> 0.756351
>>>>
>>>>
>>>>
>>>
>>>
>>>
>>>
>>>> _______________________________________________
>>>> ceph-users mailing list
>>>> [email protected]
>>>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>>>>
>>>>
>>>>
>
_______________________________________________
ceph-users mailing list
[email protected]
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com