Hi,
While fio preallocating files for perform a benchmark on nfs filesystem
exported from solaris system, it's killing storage server performance.
Issue reproduced on Solaris 11.3 and *illumos base distribution (tested on
SmartOS)*
*Steps to reproduce:*
*storage server side*
[solaris - storage]# zfs create -o recordsize=8k -o compression=lz4 -o
atime=off tank/filesystem1
[solaris - storage]# zfs set share.nfs=on tank/filesystem1; zfs set
share.nfs.rw=linux-host tank/filesystem1; zfs set
share.nfs.root=linux-host tank/filesystem1;
*linux client side*
mount -t nfs -o
rw,bg,hard,nointr,rsize=1048576,wsize=1048576,tcp,actimeo=0,vers=3,timeo=600
storage-server:/tank/filesystem1 /mnt
fio --time_based --name=benchmark --size=100g --runtime=120
--filename=/mnt/file3 --ioengine=libaio --randrepeat=0 --iodepth=128
--direct=1 --invalidate=1 --verify=0 --verify_fatal=0 --numjobs=1
--rw=randrw --blocksize=8k --group_reporting
symptoms
*linux client side*
[root@linux ~]# strace -f -c -p 16695
Process 16695 attached with 2 threads
^CProcess 16695 detached
Process 16702 detached
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
77.29 5.463644 14492 377 188 futex
22.70 1.604915 2 728563 pwrite
0.00 0.000024 24 1 1 restart_syscall
------ ----------- ----------- --------- --------- ----------------
100.00 7.068583 728941 189 total
------------------------------
*server side*
root@solaris: ~# uptime
3:54am up 3 day(s), 12 min(s), 1 user, load average: 472.45, 142.71, 51.30
root@solaris:~# vmstat 1
kthr memory page disk faults cpu
r b w swap free re mf pi po fr de sr lf s0 s1 s2 in sy cs us sy id
3 0 0 715603388 721354160 2 8 0 0 0 0 2 0 1 1 1 2222 229 7079 0 1 99
78 0 0 717664864 723409184 5 229 0 0 0 0 0 0 0 0 0 9657 546 279346 0 61 39
0 0 0 717656316 723401276 0 56 0 0 0 0 0 0 0 0 0 1620 2158 2290 0 0 100
2 0 0 717656316 723401232 0 0 0 0 0 0 0 0 0 0 0 2869 299 17472 0 5 95
3902 0 0 717656316 723401232 0 0 0 0 0 0 0 0 0 0 0 6624 358 287172 0 64 36
1 0 0 717656316 723396560 0 0 0 0 0 0 0 0 0 0 0 1345 384 886 0 0 100
0 0 0 717651968 723393376 0 0 0 0 0 0 0 0 0 0 0 6016 331 218345 0 34 66
0 0 0 717651968 723393376 0 0 0 0 0 0 0 0 0 0 0 1364 335 30143 0 13 87
2495 0 0 717647620 723388940 0 0 0 0 0 0 0 0 0 0 0 10723 331 265518 0 66 34
0 0 0 717639044 723384008 0 0 0 0 0 0 0 0 0 0 0 1183 360 876 0 0 100
^C
root@solaris:~# prstat -mL 1
PID USERNAME USR SYS TRP TFL DFL LCK SLP LAT VCX ICX SCL SIG PROCESS/LWP
3111 root 6.0 2.3 0.0 0.0 0.0 0.0 79 13 1 80 545 0 prstat/1
2358 root 0.0 7.4 0.0 0.0 0.0 0.0 89 3.6 22 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 2.2 0.0 0.0 0.0 0.0 96 2.1 9 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.8 0.0 0.0 0.0 0.0 96 1.8 13 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.7 0.0 0.0 0.0 0.0 96 2.1 11 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.7 0.0 0.0 0.0 0.0 97 1.6 12 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.7 0.0 0.0 0.0 0.0 96 2.6 14 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.6 0.0 0.0 0.0 0.0 95 2.9 25 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.6 0.0 0.0 0.0 0.0 97 1.1 17 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.6 0.0 0.0 0.0 0.0 96 2.0 11 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.6 0.0 0.0 0.0 0.0 96 1.9 10 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.5 0.0 0.0 0.0 0.0 97 2.0 11 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.5 0.0 0.0 0.0 0.0 96 2.3 10 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.5 0.0 0.0 0.0 0.0 96 2.1 10 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.5 0.0 0.0 0.0 0.0 96 2.6 18 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 97 2.0 11 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 96 2.2 10 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 96 2.6 11 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 96 2.3 15 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 96 2.3 18 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 96 2.5 12 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 96 2.7 12 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.4 0.0 0.0 0.0 0.0 97 2.1 11 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.3 0.0 0.0 0.0 0.0 97 1.2 5 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.3 0.0 0.0 0.0 0.0 96 2.9 13 0 0 0
nfsd_kproc/taskq_d_thread
2358 root 0.0 1.3 0.0 0.0 0.0 0.0 96 2.8 15 0 0 0
nfsd_kproc/taskq_d_thread
## There is no intensive network traffic while fio preallocate files for test
root@solaris:~# dlstat show-aggr aggr1 1
LINK PORT IPKTS RBYTES OPKTS OBYTES
aggr1 -- 173.39M 208.52G 248.98M 45.67G
aggr1 net11 173.13M 208.48G 228.18M 41.19G
aggr1 net6 258.76K 33.77M 20.79M 4.48G
aggr1 -- 2.64K 3.74M 8.84K 3.08M
aggr1 net11 2.64K 3.74M 1 124
aggr1 net6 2 188 8.84K 3.08M
aggr1 -- 7 598 1.57K 1.66M
aggr1 net11 6 474 1 124
aggr1 net6 1 124 1.57K 1.66M
aggr1 -- 3.28K 4.73M 15.02K 3.82M
aggr1 net11 3.28K 4.73M 1 124
aggr1 net6 1 124 15.02K 3.82M
aggr1 -- 5 458 4.45K 1.58M
aggr1 net11 4 334 1 124
aggr1 net6 1 124 4.45K 1.58M
aggr1 -- 2.01K 2.91M 18.77K 3.82M
aggr1 net11 2.01K 2.91M 1 124
aggr1 net6 1 124 18.76K 3.82M
aggr1 -- 3.26K 3.89M 24.83K 5.07M
root@solaris:~# ps -eLf|grep nfsd_kproc|wc -l
4004
## here is lockstat output
Adaptive mutex spin: 696057 events in 10.116 seconds (68806 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
75914 11% 11% 0.00 1797365 0xffffa1c15d2cf450 zfs_zget+0x4a
64659 9% 20% 0.00 7121 0xffffa10109cb7a48 dnode_hold_impl+0x2cd
52079 7% 28% 0.00 20526 0xffffa1c144135a18 taskq_thread+0x2a1
50935 7% 35% 0.00 20766 0xffffa1c144135a18 cv_wait+0x68
44944 6% 41% 0.00 364918 0xffff81c01c4187c8 sa_bulk_update+0x33
42476 6% 48% 0.00 4063 0xffff81b678cf2a80 dmu_buf_rele+0x23
36842 5% 53% 0.00 3398 0xffffa1c144135a18 taskq_dispatch+0x2ee
34361 5% 58% 0.00 10290 0xffffa10109cb7a48
dmu_object_size_from_db+0x56
33973 5% 63% 0.00 1948 0xffff81b678cf2a80 dbuf_find+0x9e
30378 4% 67% 0.00 4001 0xffff81b678cf2a80 dbuf_read+0xc7
26200 4% 71% 0.00 5998 0xffff81b678cf2a80 dmu_buf_freeable+0x2b
25861 4% 75% 0.00 5921 dbuf_hash_table+0x10d0 dbuf_find+0x61
22975 3% 78% 0.00 11485 0xffffa10109cb7a48 dmu_tx_commit+0x58
22472 3% 81% 0.00 10162 0xffffa10109cb7a48 dmu_tx_try_assign+0xfe
20749 3% 84% 0.00 4404 0xffffa10109cb7a48 dnode_rele+0x31
15526 2% 86% 0.00 9000 0xffffa1c08bff2018 svc_cots_kdup+0x73
13575 2% 88% 0.00 2944 0xffffa10109cb7a48
dmu_object_info_from_dnode+0x3c
9807 1% 90% 0.00 8631 0xffffa10109cb7a48 dbuf_dirty_impl+0x4fa
8175 1% 91% 0.00 946 0xffffa1c17bcc1a48 tcp_wput+0x56
4212 1% 91% 0.00 698 0xffff81b77ad72ec8 zfs_range_unlock+0x22
3979 1% 92% 0.00 654 0xffffa1c17bcc1a48 tcp_output+0x65
3925 1% 93% 0.00 635 0xffffa1c17bcc1a48 tcp_wput_data+0x838
3578 1% 93% 0.00 718 0xffff81b77ad72e98 zfs_range_unlock+0x22
3493 1% 94% 0.00 1891 0xffff81b77ad72e78 zfs_getattr+0x10a
2976 0% 94% 0.00 547 0xffffa1c14b74eac8 svc_async_sendreply+0x96
.......
Adaptive mutex block: 82946 events in 10.116 seconds (8199 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
45487 55% 55% 0.00 159857460 0xffffa1c15d2cf450 zfs_zget+0x4a
37114 45% 100% 0.00 55651428 0xffff81c01c4187c8 sa_bulk_update+0x33
46 0% 100% 0.00 13790471 0xffffa1c192f1aac0 txg_hold_open+0x45
46 0% 100% 0.00 3015341 0xffffa1c192f18240 txg_hold_open+0x45
33 0% 100% 0.00 12991326 0xffffa1c192f1a340 txg_hold_open+0x45
Spin lock spin: 499633 events in 10.116 seconds (49389 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
412488 83% 83% 0.00 277802 turnstile_table+0x8e8 disp_lock_enter+0x2d
76607 15% 98% 0.00 176133 turnstile_table+0xb58 disp_lock_enter+0x2d
4745 1% 99% 0.00 2258 turnstile_table+0x988 turnstile_interlock+0x16d
725 0% 99% 0.00 11095 turnstile_table+0x988 disp_lock_enter+0x2d
716 0% 99% 0.00 58156 turnstile_loser_lock turnstile_will_prio+0x162
165 0% 99% 0.00 3359 turnstile_table+0xcf8 disp_lock_enter+0x2d
84 0% 99% 0.00 1354 0xffffa1c13a5ca690 disp_lock_enter+0x2d
79 0% 99% 0.00 6633 0xffffa1c13b5363e8 disp_lock_enter+0x2d
76 0% 99% 0.00 1165 0xffffa1c13b536ed8 disp_lock_enter+0x2d
Thread lock spin: 5975 events in 10.116 seconds (591 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
5840 98% 98% 0.00 5543 turnstile_table+0xb58 turnstile_will_prio+0x173
122 2% 100% 0.00 1486 sleepq_head+0x7548 sysdc_update+0x15b
2 0% 100% 0.00 550 0xffffa1c13a5ca690 sysdc_update+0x15b
1 0% 100% 0.00 2066 cpu[55]+0xe8 cv_wait+0x36
1 0% 100% 0.00 1458 0xffffa1c13b6f4338 lwp_continue+0x35
1 0% 100% 0.00 265 cpu[46]+0xe8 post_syscall+0x5ea
R/W writer blocked by writer: 1 events in 10.116 seconds (0 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
1 100% 100% 0.00 7302358 0xffffa10109cb7970 dnode_new_blkid+0x73
-------------------------------------------------------------------------------
R/W writer blocked by readers: 286 events in 10.116 seconds (28 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
280 98% 98% 0.00 5103151 0xffffa10109cb7970 dnode_new_blkid+0x73
6 2% 100% 0.00 6176 0xffffa1c1588e9e18 as_map+0x3f
-------------------------------------------------------------------------------
R/W reader blocked by writer: 455 events in 10.116 seconds (45 events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
111 24% 24% 0.00 6380761 0xffffa10109cb7970 dmu_tx_check_ioerr+0x40
97 21% 46% 0.00 5590398 0xffffa10109cb7970 dmu_tx_count_write+0x2d9
95 21% 67% 0.00 4772886 0xffffa10109cb7970 dbuf_read+0x6a
71 16% 82% 0.00 5022494 0xffffa10109cb7970
dmu_buf_hold_array_by_dnode+0x6f
R/W reader blocked by write wanted: 1586 events in 10.116 seconds (157
events/sec)
Count indv cuml rcnt nsec Lock Caller
-------------------------------------------------------------------------------
470 30% 30% 0.00 8174114 0xffffa10109cb7970 dmu_tx_check_ioerr+0x40
363 23% 53% 0.00 7275920 0xffffa10109cb7970 dbuf_read+0x6a
238 15% 68% 0.00 8268051 0xffffa10109cb7970
dmu_buf_hold_array_by_dnode+0x6f
202 13% 80% 0.00 7048660 0xffffa10109cb7970 dmu_tx_count_write+0x2d9
175 11% 91% 0.00 7460750 0xffffa10109cb7970 dmu_bonus_hold+0x48
134 8% 100% 0.00 8190144 0xffffa10109cb7970 dbuf_dirty_impl+0x5cb
4 0% 100% 0.00 5574502 0xffffa10109cb7970
dmu_object_info_from_dnode+0x2c
-----------------
*configuration*
storage server
*hardware*: cisco ucs240m4 (cpu: 2 x Intel(R) Xeon(R) CPU E5-2697A v4 @
2.60GHz; mem: 768gb)
*os*: 0.5.11 (Oracle Solaris 11.3.21.5.0), *but also itβs reproduce on
smartos*
*pool*: version: 37, configuration: 8 x raidz2 (8d + 2p), slog: 4 x mirror
ssd, l2arc: 4 x sad
*sas*: SAS9300-8E
network
nfs exported through dedicated L2 segment, 10gbe low latency ethernet
network with jumbo frame enabled. jumbo frames enabled on solaris server
and linux client.
βββ
PS: I also post problem at
https://www.reddit.com/r/zfs/comments/6n0age/fio_benchmark_running_throw_nfs_kill_solaris/
-------------------------------------------
smartos-discuss
Archives: https://www.listbox.com/member/archive/184463/=now
RSS Feed: https://www.listbox.com/member/archive/rss/184463/25769125-55cfbc00
Modify Your Subscription:
https://www.listbox.com/member/?member_id=25769125&id_secret=25769125-7688e9fb
Powered by Listbox: http://www.listbox.com