Author: mm
Date: Sat May 15 07:07:38 2010
New Revision: 208109
URL: http://svn.freebsd.org/changeset/base/208109

Log:
  MFC r207481, r207956:
  
  MFC r207481 [1]:
  Add sysctl and loader tunable vfs.zfs.txg.write_limit_override.
  This tunable improves fine-tuning of ZFS write throttling.
  
  MFC r207956 [2]:
  Fix possible hang when replaying large truncations.
  OpenSolaris onnv revision:    7904:6a124a4ca9c5
  
  PR:           kern/146108 [1]
  Suggested by: Nikolay Denev <ndenev at gmail.com> [1]
  Obtained from:        OpenSolaris (Bug ID 6761624) [2]
  Approved by:  pjd, delphij (mentor)

Modified:
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)
  stable/8/sys/geom/sched/   (props changed)

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c       Sat May 
15 07:01:41 2010        (r208108)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c       Sat May 
15 07:07:38 2010        (r208109)
@@ -38,6 +38,7 @@ static void txg_quiesce_thread(void *arg
 
 int zfs_txg_timeout = 30;      /* max seconds worth of delta per txg */
 extern int zfs_txg_synctime;
+extern uint64_t zfs_write_limit_override;
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0,
@@ -48,6 +49,11 @@ SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, timeo
 TUNABLE_INT("vfs.zfs.txg.synctime", &zfs_txg_synctime);
 SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, synctime, CTLFLAG_RDTUN, &zfs_txg_synctime,
     0, "Target seconds to sync a txg");
+TUNABLE_QUAD("vfs.zfs.txg.write_limit_override", &zfs_write_limit_override);
+SYSCTL_QUAD(_vfs_zfs_txg, OID_AUTO, write_limit_override, CTLFLAG_RW,
+    &zfs_write_limit_override, 0,
+    "Override maximum size of a txg to this size in bytes, "
+    "value of 0 means don't override");
 
 /*
  * Prepare the txg subsystem.

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c       Sat May 
15 07:01:41 2010        (r208108)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c       Sat May 
15 07:07:38 2010        (r208109)
@@ -1567,6 +1567,29 @@ zil_replay_log_record(zilog_t *zilog, lr
        }
 
        /*
+        * Replay of large truncates can end up needing additional txs
+        * and a different txg. If they are nested within the replay tx
+        * as below then a hang is possible. So we do the truncate here
+        * and redo the truncate later (a no-op) and update the sequence
+        * number whilst in the replay tx. Fortunately, it's safe to repeat
+        * a truncate if we crash and the truncate commits. A create over
+        * an existing file will also come in as a TX_TRUNCATE record.
+        *
+        * Note, remove of large files and renames over large files is
+        * handled by putting the deleted object on a stable list
+        * and if necessary force deleting the object outside of the replay
+        * transaction using the zr_replay_cleaner.
+        */
+       if (txtype == TX_TRUNCATE) {
+               *zr->zr_txgp = TXG_NOWAIT;
+               error = zr->zr_replay[TX_TRUNCATE](zr->zr_arg, zr->zr_lrbuf,
+                   zr->zr_byteswap);
+               if (error)
+                       goto bad;
+               zr->zr_byteswap = 0; /* only byteswap once */
+       }
+
+       /*
         * We must now do two things atomically: replay this log record,
         * and update the log header to reflect the fact that we did so.
         * We use the DMU's ability to assign into a specific txg to do this.
@@ -1636,6 +1659,7 @@ zil_replay_log_record(zilog_t *zilog, lr
                dprintf("pass %d, retrying\n", pass);
        }
 
+bad:
        ASSERT(error && error != ERESTART);
        name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
        dmu_objset_name(zr->zr_os, name);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to