Author: mmacy
Date: Sun Aug 12 02:24:18 2018
New Revision: 337676
URL: https://svnweb.freebsd.org/changeset/base/337676

Log:
  MFV/ZoL:     Fix stack dbuf_hold_impl()
  
  commit fc5bb51f08a6c91ff9ad3559d0266eeeab0b1f61
  Author: Brian Behlendorf <behlendo...@llnl.gov>
  Date:   Thu Aug 26 10:52:00 2010 -0700
  
      Fix stack dbuf_hold_impl()
  
      This commit preserves the recursive function dbuf_hold_impl() but moves
      the local variables and function arguments to the heap to minimize
      the stack frame size.  Enough space is initially allocated on the
      stack for 20 levels of recursion.  This technique was based on commit
      34229a2f2ac07363f64ddd63e014964fff2f0671 which reduced stack usage of
      traverse_visitbp().
  
      Signed-off-by: Brian Behlendorf <behlendo...@llnl.gov>

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Sun Aug 12 
02:12:44 2018        (r337675)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c  Sun Aug 12 
02:24:18 2018        (r337676)
@@ -51,6 +51,30 @@
 #include <sys/cityhash.h>
 #include <sys/spa_impl.h>
 
+struct dbuf_hold_impl_data {
+       /* Function arguments */
+       dnode_t *dh_dn;
+       uint8_t dh_level;
+       uint64_t dh_blkid;
+       boolean_t dh_fail_sparse;
+       boolean_t dh_fail_uncached;
+       void *dh_tag;
+       dmu_buf_impl_t **dh_dbp;
+       /* Local variables */
+       dmu_buf_impl_t *dh_db;
+       dmu_buf_impl_t *dh_parent;
+       blkptr_t *dh_bp;
+       int dh_err;
+       dbuf_dirty_record_t *dh_dr;
+       int dh_depth;
+};
+
+static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
+    dnode_t *dn, uint8_t level, uint64_t blkid, boolean_t fail_sparse,
+       boolean_t fail_uncached,
+       void *tag, dmu_buf_impl_t **dbp, int depth);
+static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
+
 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
 
@@ -2202,9 +2226,10 @@ dbuf_destroy(dmu_buf_impl_t *db)
  * this happens when the dnode is the meta-dnode, or a userused or groupused
  * object.
  */
-static int
+__attribute__((always_inline))
+static inline int
 dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
-    dmu_buf_impl_t **parentp, blkptr_t **bpp)
+    dmu_buf_impl_t **parentp, blkptr_t **bpp, struct dbuf_hold_impl_data *dh)
 {
        *parentp = NULL;
        *bpp = NULL;
@@ -2257,8 +2282,16 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, in
                return (SET_ERROR(ENOENT));
        } else if (level < nlevels-1) {
                /* this block is referenced from an indirect block */
-               int err = dbuf_hold_impl(dn, level+1,
-                   blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
+               int err;
+               if (dh == NULL) {
+                       err = dbuf_hold_impl(dn, level+1,
+                           blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
+               } else {
+                       __dbuf_hold_impl_init(dh + 1, dn, dh->dh_level + 1,
+                           blkid >> epbs, fail_sparse, FALSE, NULL,
+                           parentp, dh->dh_depth + 1);
+                       err = __dbuf_hold_impl(dh + 1);
+               }
                if (err)
                        return (err);
                err = dbuf_read(*parentp, NULL,
@@ -2626,108 +2659,189 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blk
        zio_nowait(pio);
 }
 
+#define        DBUF_HOLD_IMPL_MAX_DEPTH        20
+
 /*
+ * Helper function for __dbuf_hold_impl() to copy a buffer. Handles
+ * the case of encrypted, compressed and uncompressed buffers by
+ * allocating the new buffer, respectively, with arc_alloc_raw_buf(),
+ * arc_alloc_compressed_buf() or arc_alloc_buf().*
+ *
+ * NOTE: Declared noinline to avoid stack bloat in __dbuf_hold_impl().
+ */
+noinline static void
+dbuf_hold_copy(struct dbuf_hold_impl_data *dh)
+{
+       dnode_t *dn = dh->dh_dn;
+       dmu_buf_impl_t *db = dh->dh_db;
+       dbuf_dirty_record_t *dr = dh->dh_dr;
+       arc_buf_t *data = dr->dt.dl.dr_data;
+
+       enum zio_compress compress_type = arc_get_compression(data);
+
+       if (compress_type != ZIO_COMPRESS_OFF) {
+               dbuf_set_data(db, arc_alloc_compressed_buf(
+                   dn->dn_objset->os_spa, db, arc_buf_size(data),
+                   arc_buf_lsize(data), compress_type));
+       } else {
+               dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db,
+                   DBUF_GET_BUFC_TYPE(db), db->db.db_size));
+       }
+
+       bcopy(data->b_data, db->db.db_data, arc_buf_size(data));
+}
+
+/*
  * Returns with db_holds incremented, and db_mtx not held.
  * Note: dn_struct_rwlock must be held.
  */
-int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
-    boolean_t fail_sparse, boolean_t fail_uncached,
-    void *tag, dmu_buf_impl_t **dbp)
+static int
+__dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
 {
-       dmu_buf_impl_t *db, *parent = NULL;
+       ASSERT3S(dh->dh_depth, <, DBUF_HOLD_IMPL_MAX_DEPTH);
+       dh->dh_parent = NULL;
 
-       ASSERT(blkid != DMU_BONUS_BLKID);
-       ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
-       ASSERT3U(dn->dn_nlevels, >, level);
+       ASSERT(dh->dh_blkid != DMU_BONUS_BLKID);
+       ASSERT(RW_LOCK_HELD(&dh->dh_dn->dn_struct_rwlock));
+       ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level);
 
-       *dbp = NULL;
-top:
+       *(dh->dh_dbp) = NULL;
+
        /* dbuf_find() returns with db_mtx held */
-       db = dbuf_find(dn->dn_objset, dn->dn_object, level, blkid);
+       dh->dh_db = dbuf_find(dh->dh_dn->dn_objset, dh->dh_dn->dn_object,
+           dh->dh_level, dh->dh_blkid);
 
-       if (db == NULL) {
-               blkptr_t *bp = NULL;
-               int err;
+       if (dh->dh_db == NULL) {
+               dh->dh_bp = NULL;
 
-               if (fail_uncached)
+               if (dh->dh_fail_uncached)
                        return (SET_ERROR(ENOENT));
 
-               ASSERT3P(parent, ==, NULL);
-               err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
-               if (fail_sparse) {
-                       if (err == 0 && bp && BP_IS_HOLE(bp))
-                               err = SET_ERROR(ENOENT);
-                       if (err) {
-                               if (parent)
-                                       dbuf_rele(parent, NULL);
-                               return (err);
+               ASSERT3P(dh->dh_parent, ==, NULL);
+               dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid,
+                   dh->dh_fail_sparse, &dh->dh_parent, &dh->dh_bp, dh);
+               if (dh->dh_fail_sparse) {
+                       if (dh->dh_err == 0 &&
+                           dh->dh_bp && BP_IS_HOLE(dh->dh_bp))
+                               dh->dh_err = SET_ERROR(ENOENT);
+                       if (dh->dh_err) {
+                               if (dh->dh_parent)
+                                       dbuf_rele(dh->dh_parent, NULL);
+                               return (dh->dh_err);
                        }
                }
-               if (err && err != ENOENT)
-                       return (err);
-               db = dbuf_create(dn, level, blkid, parent, bp);
+               if (dh->dh_err && dh->dh_err != ENOENT)
+                       return (dh->dh_err);
+               dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid,
+                   dh->dh_parent, dh->dh_bp);
        }
 
-       if (fail_uncached && db->db_state != DB_CACHED) {
-               mutex_exit(&db->db_mtx);
+       if (dh->dh_fail_uncached && dh->dh_db->db_state != DB_CACHED) {
+               mutex_exit(&dh->dh_db->db_mtx);
                return (SET_ERROR(ENOENT));
        }
 
-       if (db->db_buf != NULL) {
-               arc_buf_access(db->db_buf);
-               ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
+       if (dh->dh_db->db_buf != NULL) {
+               arc_buf_access(dh->dh_db->db_buf);
+               ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
        }
 
-       ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf));
+       ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
 
        /*
         * If this buffer is currently syncing out, and we are are
         * still referencing it from db_data, we need to make a copy
         * of it in case we decide we want to dirty it again in this txg.
         */
-       if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
-           dn->dn_object != DMU_META_DNODE_OBJECT &&
-           db->db_state == DB_CACHED && db->db_data_pending) {
-               dbuf_dirty_record_t *dr = db->db_data_pending;
-
-               if (dr->dt.dl.dr_data == db->db_buf) {
-                       arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
-                       dbuf_set_data(db,
-                           arc_alloc_buf(dn->dn_objset->os_spa, db, type,
-                           db->db.db_size));
-                       bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
-                           db->db.db_size);
-               }
+       if (dh->dh_db->db_level == 0 &&
+           dh->dh_db->db_blkid != DMU_BONUS_BLKID &&
+           dh->dh_dn->dn_object != DMU_META_DNODE_OBJECT &&
+           dh->dh_db->db_state == DB_CACHED && dh->dh_db->db_data_pending) {
+               dh->dh_dr = dh->dh_db->db_data_pending;
+               if (dh->dh_dr->dt.dl.dr_data == dh->dh_db->db_buf)
+                       dbuf_hold_copy(dh);
        }
 
-       if (multilist_link_active(&db->db_cache_link)) {
-               ASSERT(refcount_is_zero(&db->db_holds));
-               ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
-                   db->db_caching_status == DB_DBUF_METADATA_CACHE);
+       if (multilist_link_active(&dh->dh_db->db_cache_link)) {
+               ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
+               ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE ||
+                   dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE);
 
-               multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
+               multilist_remove(
+                   dbuf_caches[dh->dh_db->db_caching_status].cache,
+                   dh->dh_db);
                (void) refcount_remove_many(
-                   &dbuf_caches[db->db_caching_status].size,
-                   db->db.db_size, db);
+                   &dbuf_caches[dh->dh_db->db_caching_status].size,
+                   dh->dh_db->db.db_size, dh->dh_db);
 
-               db->db_caching_status = DB_NO_CACHE;
+               dh->dh_db->db_caching_status = DB_NO_CACHE;
        }
-       (void) refcount_add(&db->db_holds, tag);
-       DBUF_VERIFY(db);
-       mutex_exit(&db->db_mtx);
+       (void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
+       DBUF_VERIFY(dh->dh_db);
+       mutex_exit(&dh->dh_db->db_mtx);
 
        /* NOTE: we can't rele the parent until after we drop the db_mtx */
-       if (parent)
-               dbuf_rele(parent, NULL);
+       if (dh->dh_parent)
+               dbuf_rele(dh->dh_parent, NULL);
 
-       ASSERT3P(DB_DNODE(db), ==, dn);
-       ASSERT3U(db->db_blkid, ==, blkid);
-       ASSERT3U(db->db_level, ==, level);
-       *dbp = db;
+       ASSERT3P(DB_DNODE(dh->dh_db), ==, dh->dh_dn);
+       ASSERT3U(dh->dh_db->db_blkid, ==, dh->dh_blkid);
+       ASSERT3U(dh->dh_db->db_level, ==, dh->dh_level);
+       *(dh->dh_dbp) = dh->dh_db;
 
        return (0);
+}
+
+/*
+ * The following code preserves the recursive function dbuf_hold_impl()
+ * but moves the local variables AND function arguments to the heap to
+ * minimize the stack frame size.  Enough space is initially allocated
+ * on the stack for 20 levels of recursion.
+ */
+int
+dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
+    boolean_t fail_sparse, boolean_t fail_uncached,
+    void *tag, dmu_buf_impl_t **dbp)
+{
+       struct dbuf_hold_impl_data *dh;
+       int error;
+
+       dh = kmem_alloc(sizeof (struct dbuf_hold_impl_data) *
+           DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
+       __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse,
+           fail_uncached, tag, dbp, 0);
+
+       error = __dbuf_hold_impl(dh);
+
+       kmem_free(dh, sizeof (struct dbuf_hold_impl_data) *
+           DBUF_HOLD_IMPL_MAX_DEPTH);
+
+       return (error);
+}
+
+static void
+__dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
+    dnode_t *dn, uint8_t level, uint64_t blkid,
+    boolean_t fail_sparse, boolean_t fail_uncached,
+    void *tag, dmu_buf_impl_t **dbp, int depth)
+{
+       dh->dh_dn = dn;
+       dh->dh_level = level;
+       dh->dh_blkid = blkid;
+
+       dh->dh_fail_sparse = fail_sparse;
+       dh->dh_fail_uncached = fail_uncached;
+
+       dh->dh_tag = tag;
+       dh->dh_dbp = dbp;
+
+       dh->dh_db = NULL;
+       dh->dh_parent = NULL;
+       dh->dh_bp = NULL;
+       dh->dh_err = 0;
+       dh->dh_dr = NULL;
+
+       dh->dh_depth = depth;
 }
 
 dmu_buf_impl_t *
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to