Hi,

While fio preallocating files for perform a benchmark on nfs filesystem
exported from solaris system, it's killing storage server performance.
Issue reproduced on Solaris 11.3 and *illumos base distribution (tested on
SmartOS)*

*Steps to reproduce:*
*storage server side*

[solaris - storage]# zfs create -o recordsize=8k -o compression=lz4 -o
atime=off tank/filesystem1
[solaris - storage]# zfs set share.nfs=on tank/filesystem1; zfs set
share.nfs.rw=linux-host tank/filesystem1; zfs set
share.nfs.root=linux-host tank/filesystem1;

*linux client side*

mount -t nfs -o
rw,bg,hard,nointr,rsize=1048576,wsize=1048576,tcp,actimeo=0,vers=3,timeo=600
storage-server:/tank/filesystem1 /mnt

fio --time_based --name=benchmark --size=100g --runtime=120
--filename=/mnt/file3 --ioengine=libaio --randrepeat=0 --iodepth=128
--direct=1 --invalidate=1 --verify=0 --verify_fatal=0 --numjobs=1
--rw=randrw --blocksize=8k --group_reporting


symptoms
*linux client side*

[root@linux ~]# strace -f -c -p 16695
Process 16695 attached with 2 threads
^CProcess 16695 detached
Process 16702 detached
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 77.29    5.463644       14492       377       188 futex
 22.70    1.604915           2    728563           pwrite
  0.00    0.000024          24         1         1 restart_syscall
------ ----------- ----------- --------- --------- ----------------
100.00    7.068583                728941       189 total

------------------------------

*server side*

root@solaris: ~# uptime
  3:54am  up 3 day(s), 12 min(s),  1 user,  load average: 472.45, 142.71, 51.30

root@solaris:~# vmstat 1
 kthr      memory            page            disk          faults      cpu
 r b w   swap  free  re  mf pi po fr de sr lf s0 s1 s2   in   sy   cs us sy id
 3 0 0 715603388 721354160 2 8 0 0 0  0  2  0  1  1  1 2222  229 7079  0  1 99
 78 0 0 717664864 723409184 5 229 0 0 0 0 0 0  0  0  0 9657  546 279346 0 61 39
 0 0 0 717656316 723401276 0 56 0 0 0 0  0  0  0  0  0 1620 2158 2290  0  0 100
 2 0 0 717656316 723401232 0 0 0 0 0  0  0  0  0  0  0 2869  299 17472 0  5 95
 3902 0 0 717656316 723401232 0 0 0 0 0 0 0 0  0  0  0 6624  358 287172 0 64 36
 1 0 0 717656316 723396560 0 0 0 0 0  0  0  0  0  0  0 1345  384  886  0  0 100
 0 0 0 717651968 723393376 0 0 0 0 0  0  0  0  0  0  0 6016  331 218345 0 34 66
 0 0 0 717651968 723393376 0 0 0 0 0  0  0  0  0  0  0 1364  335 30143 0 13 87
 2495 0 0 717647620 723388940 0 0 0 0 0 0 0 0  0  0  0 10723 331 265518 0 66 34
 0 0 0 717639044 723384008 0 0 0 0 0  0  0  0  0  0  0 1183  360  876  0  0 100
^C


root@solaris:~# prstat -mL 1

   PID USERNAME USR SYS TRP TFL DFL LCK SLP LAT VCX ICX SCL SIG PROCESS/LWP
  3111 root     6.0 2.3 0.0 0.0 0.0 0.0  79  13   1  80 545   0 prstat/1
  2358 root     0.0 7.4 0.0 0.0 0.0 0.0  89 3.6  22   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 2.2 0.0 0.0 0.0 0.0  96 2.1   9   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.8 0.0 0.0 0.0 0.0  96 1.8  13   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.7 0.0 0.0 0.0 0.0  96 2.1  11   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.7 0.0 0.0 0.0 0.0  97 1.6  12   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.7 0.0 0.0 0.0 0.0  96 2.6  14   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.6 0.0 0.0 0.0 0.0  95 2.9  25   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.6 0.0 0.0 0.0 0.0  97 1.1  17   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.6 0.0 0.0 0.0 0.0  96 2.0  11   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.6 0.0 0.0 0.0 0.0  96 1.9  10   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.5 0.0 0.0 0.0 0.0  97 2.0  11   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.5 0.0 0.0 0.0 0.0  96 2.3  10   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.5 0.0 0.0 0.0 0.0  96 2.1  10   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.5 0.0 0.0 0.0 0.0  96 2.6  18   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  97 2.0  11   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  96 2.2  10   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  96 2.6  11   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  96 2.3  15   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  96 2.3  18   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  96 2.5  12   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  96 2.7  12   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.4 0.0 0.0 0.0 0.0  97 2.1  11   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.3 0.0 0.0 0.0 0.0  97 1.2   5   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.3 0.0 0.0 0.0 0.0  96 2.9  13   0   0   0
nfsd_kproc/taskq_d_thread
  2358 root     0.0 1.3 0.0 0.0 0.0 0.0  96 2.8  15   0   0   0
nfsd_kproc/taskq_d_thread



## There is no intensive network traffic while fio preallocate files for test
root@solaris:~# dlstat show-aggr aggr1 1
           LINK            PORT    IPKTS   RBYTES    OPKTS   OBYTES
          aggr1              --  173.39M  208.52G  248.98M   45.67G
          aggr1           net11  173.13M  208.48G  228.18M   41.19G
          aggr1            net6  258.76K   33.77M   20.79M    4.48G
          aggr1              --    2.64K    3.74M    8.84K    3.08M
          aggr1           net11    2.64K    3.74M        1      124
          aggr1            net6        2      188    8.84K    3.08M
          aggr1              --        7      598    1.57K    1.66M
          aggr1           net11        6      474        1      124
          aggr1            net6        1      124    1.57K    1.66M
          aggr1              --    3.28K    4.73M   15.02K    3.82M
          aggr1           net11    3.28K    4.73M        1      124
          aggr1            net6        1      124   15.02K    3.82M
          aggr1              --        5      458    4.45K    1.58M
          aggr1           net11        4      334        1      124
          aggr1            net6        1      124    4.45K    1.58M
          aggr1              --    2.01K    2.91M   18.77K    3.82M
          aggr1           net11    2.01K    2.91M        1      124
          aggr1            net6        1      124   18.76K    3.82M
          aggr1              --    3.26K    3.89M   24.83K    5.07M


root@solaris:~# ps -eLf|grep nfsd_kproc|wc -l
    4004

## here is lockstat output

Adaptive mutex spin: 696057 events in 10.116 seconds (68806 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
75914  11%  11% 0.00  1797365 0xffffa1c15d2cf450     zfs_zget+0x4a
64659   9%  20% 0.00     7121 0xffffa10109cb7a48     dnode_hold_impl+0x2cd
52079   7%  28% 0.00    20526 0xffffa1c144135a18     taskq_thread+0x2a1
50935   7%  35% 0.00    20766 0xffffa1c144135a18     cv_wait+0x68
44944   6%  41% 0.00   364918 0xffff81c01c4187c8     sa_bulk_update+0x33
42476   6%  48% 0.00     4063 0xffff81b678cf2a80     dmu_buf_rele+0x23
36842   5%  53% 0.00     3398 0xffffa1c144135a18     taskq_dispatch+0x2ee
34361   5%  58% 0.00    10290 0xffffa10109cb7a48
dmu_object_size_from_db+0x56
33973   5%  63% 0.00     1948 0xffff81b678cf2a80     dbuf_find+0x9e
30378   4%  67% 0.00     4001 0xffff81b678cf2a80     dbuf_read+0xc7
26200   4%  71% 0.00     5998 0xffff81b678cf2a80     dmu_buf_freeable+0x2b
25861   4%  75% 0.00     5921 dbuf_hash_table+0x10d0 dbuf_find+0x61
22975   3%  78% 0.00    11485 0xffffa10109cb7a48     dmu_tx_commit+0x58
22472   3%  81% 0.00    10162 0xffffa10109cb7a48     dmu_tx_try_assign+0xfe
20749   3%  84% 0.00     4404 0xffffa10109cb7a48     dnode_rele+0x31
15526   2%  86% 0.00     9000 0xffffa1c08bff2018     svc_cots_kdup+0x73
13575   2%  88% 0.00     2944 0xffffa10109cb7a48
dmu_object_info_from_dnode+0x3c
 9807   1%  90% 0.00     8631 0xffffa10109cb7a48     dbuf_dirty_impl+0x4fa
 8175   1%  91% 0.00      946 0xffffa1c17bcc1a48     tcp_wput+0x56
 4212   1%  91% 0.00      698 0xffff81b77ad72ec8     zfs_range_unlock+0x22
 3979   1%  92% 0.00      654 0xffffa1c17bcc1a48     tcp_output+0x65
 3925   1%  93% 0.00      635 0xffffa1c17bcc1a48     tcp_wput_data+0x838
 3578   1%  93% 0.00      718 0xffff81b77ad72e98     zfs_range_unlock+0x22
 3493   1%  94% 0.00     1891 0xffff81b77ad72e78     zfs_getattr+0x10a
 2976   0%  94% 0.00      547 0xffffa1c14b74eac8     svc_async_sendreply+0x96
.......



Adaptive mutex block: 82946 events in 10.116 seconds (8199 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
45487  55%  55% 0.00 159857460 0xffffa1c15d2cf450     zfs_zget+0x4a
37114  45% 100% 0.00 55651428 0xffff81c01c4187c8     sa_bulk_update+0x33
   46   0% 100% 0.00 13790471 0xffffa1c192f1aac0     txg_hold_open+0x45
   46   0% 100% 0.00  3015341 0xffffa1c192f18240     txg_hold_open+0x45
   33   0% 100% 0.00 12991326 0xffffa1c192f1a340     txg_hold_open+0x45


Spin lock spin: 499633 events in 10.116 seconds (49389 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
412488  83%  83% 0.00   277802 turnstile_table+0x8e8  disp_lock_enter+0x2d
76607  15%  98% 0.00   176133 turnstile_table+0xb58  disp_lock_enter+0x2d
 4745   1%  99% 0.00     2258 turnstile_table+0x988  turnstile_interlock+0x16d
  725   0%  99% 0.00    11095 turnstile_table+0x988  disp_lock_enter+0x2d
  716   0%  99% 0.00    58156 turnstile_loser_lock   turnstile_will_prio+0x162
  165   0%  99% 0.00     3359 turnstile_table+0xcf8  disp_lock_enter+0x2d
   84   0%  99% 0.00     1354 0xffffa1c13a5ca690     disp_lock_enter+0x2d
   79   0%  99% 0.00     6633 0xffffa1c13b5363e8     disp_lock_enter+0x2d
   76   0%  99% 0.00     1165 0xffffa1c13b536ed8     disp_lock_enter+0x2d



Thread lock spin: 5975 events in 10.116 seconds (591 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
 5840  98%  98% 0.00     5543 turnstile_table+0xb58  turnstile_will_prio+0x173
  122   2% 100% 0.00     1486 sleepq_head+0x7548     sysdc_update+0x15b
    2   0% 100% 0.00      550 0xffffa1c13a5ca690     sysdc_update+0x15b
    1   0% 100% 0.00     2066 cpu[55]+0xe8           cv_wait+0x36
    1   0% 100% 0.00     1458 0xffffa1c13b6f4338     lwp_continue+0x35
    1   0% 100% 0.00      265 cpu[46]+0xe8           post_syscall+0x5ea

R/W writer blocked by writer: 1 events in 10.116 seconds (0 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
    1 100% 100% 0.00  7302358 0xffffa10109cb7970     dnode_new_blkid+0x73
-------------------------------------------------------------------------------

R/W writer blocked by readers: 286 events in 10.116 seconds (28 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
  280  98%  98% 0.00  5103151 0xffffa10109cb7970     dnode_new_blkid+0x73
    6   2% 100% 0.00     6176 0xffffa1c1588e9e18     as_map+0x3f
-------------------------------------------------------------------------------


R/W reader blocked by writer: 455 events in 10.116 seconds (45 events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
  111  24%  24% 0.00  6380761 0xffffa10109cb7970     dmu_tx_check_ioerr+0x40
   97  21%  46% 0.00  5590398 0xffffa10109cb7970     dmu_tx_count_write+0x2d9
   95  21%  67% 0.00  4772886 0xffffa10109cb7970     dbuf_read+0x6a
   71  16%  82% 0.00  5022494 0xffffa10109cb7970
dmu_buf_hold_array_by_dnode+0x6f




R/W reader blocked by write wanted: 1586 events in 10.116 seconds (157
events/sec)

Count indv cuml rcnt     nsec Lock                   Caller
-------------------------------------------------------------------------------
  470  30%  30% 0.00  8174114 0xffffa10109cb7970     dmu_tx_check_ioerr+0x40
  363  23%  53% 0.00  7275920 0xffffa10109cb7970     dbuf_read+0x6a
  238  15%  68% 0.00  8268051 0xffffa10109cb7970
dmu_buf_hold_array_by_dnode+0x6f
  202  13%  80% 0.00  7048660 0xffffa10109cb7970     dmu_tx_count_write+0x2d9
  175  11%  91% 0.00  7460750 0xffffa10109cb7970     dmu_bonus_hold+0x48
  134   8% 100% 0.00  8190144 0xffffa10109cb7970     dbuf_dirty_impl+0x5cb
    4   0% 100% 0.00  5574502 0xffffa10109cb7970
dmu_object_info_from_dnode+0x2c



-----------------
*configuration*
storage server
*hardware*: cisco ucs240m4 (cpu: 2 x Intel(R) Xeon(R) CPU E5-2697A v4 @
2.60GHz; mem: 768gb)
*os*: 0.5.11 (Oracle Solaris 11.3.21.5.0), *but also it’s reproduce on
smartos*
*pool*: version: 37, configuration: 8 x raidz2 (8d + 2p), slog: 4 x mirror
ssd, l2arc: 4 x sad
*sas*: SAS9300-8E
network
nfs exported through dedicated L2 segment, 10gbe low latency ethernet
network with jumbo frame enabled. jumbo frames enabled on solaris server
and linux client.

β€”β€”β€”
PS: I also post problem at
https://www.reddit.com/r/zfs/comments/6n0age/fio_benchmark_running_throw_nfs_kill_solaris/



-------------------------------------------
smartos-discuss
Archives: https://www.listbox.com/member/archive/184463/=now
RSS Feed: https://www.listbox.com/member/archive/rss/184463/25769125-55cfbc00
Modify Your Subscription: 
https://www.listbox.com/member/?member_id=25769125&id_secret=25769125-7688e9fb
Powered by Listbox: http://www.listbox.com

Reply via email to