diff -Nru zfs-linux-2.1.9/debian/changelog zfs-linux-2.1.9/debian/changelog --- zfs-linux-2.1.9/debian/changelog 2023-02-26 12:32:52.000000000 +0800 +++ zfs-linux-2.1.9/debian/changelog 2023-03-25 23:07:02.000000000 +0800 @@ -1,3 +1,9 @@ +zfs-linux (2.1.9-3) unstable; urgency=medium + + * targeted cherry-pick to fix quality issues + + -- Aron Xu Sat, 25 Mar 2023 23:07:02 +0800 + zfs-linux (2.1.9-2) unstable; urgency=medium [ Aron Xu ] diff -Nru zfs-linux-2.1.9/debian/patches/0002-Increase-default-zfs_scan_vdev_limit-to-16MB.patch zfs-linux-2.1.9/debian/patches/0002-Increase-default-zfs_scan_vdev_limit-to-16MB.patch --- zfs-linux-2.1.9/debian/patches/0002-Increase-default-zfs_scan_vdev_limit-to-16MB.patch 2023-02-25 13:00:05.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0002-Increase-default-zfs_scan_vdev_limit-to-16MB.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,117 +0,0 @@ -From c0aea7cf4e86fc02db8046fbb3bca21a918053a2 Mon Sep 17 00:00:00 2001 -From: Brian Behlendorf -Date: Tue, 24 Jan 2023 14:05:45 -0800 -Subject: [PATCH] Increase default zfs_scan_vdev_limit to 16MB - -For HDD based pools the default zfs_scan_vdev_limit of 4M -per-vdev can significantly limit the maximum scrub performance. -Increasing the default to 16M can double the scrub speed from -80 MB/s per disk to 160 MB/s per disk. - -This does increase the memory footprint during scrub/resilver -but given the performance win this is a reasonable trade off. -Memory usage is capped at 1/4 of arc_c_max. Note that number -of outstanding I/Os has not changed and is still limited by -zfs_vdev_scrub_max_active. - -Reviewed-by: Akash B -Reviewed-by: Tony Nguyen -Reviewed-by: Alexander Motin -Updated-by: Aron Xu -Signed-off-by: Brian Behlendorf -Closes #14428 ---- - man/man4/zfs.4 | 2 +- - module/zfs/dsl_scan.c | 28 ++++++++++++++++------------ - 2 files changed, 17 insertions(+), 13 deletions(-) - -Index: zfs/module/zfs/dsl_scan.c -=================================================================== ---- zfs.orig/module/zfs/dsl_scan.c -+++ zfs/module/zfs/dsl_scan.c -@@ -37,6 +37,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -126,7 +127,7 @@ static boolean_t scan_ds_queue_contains( - static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); - static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); - static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); --static uint64_t dsl_scan_count_data_disks(vdev_t *vd); -+static uint64_t dsl_scan_count_data_disks(spa_t *spa); - - extern int zfs_vdev_async_write_active_min_dirty_percent; - static int zfs_scan_blkstats = 0; -@@ -147,7 +148,7 @@ int zfs_scan_strict_mem_lim = B_FALSE; - * overload the drives with I/O, since that is protected by - * zfs_vdev_scrub_max_active. - */ --unsigned long zfs_scan_vdev_limit = 4 << 20; -+unsigned long zfs_scan_vdev_limit = 16 << 20; - - int zfs_scan_issue_strategy = 0; - int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */ -@@ -450,11 +451,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t t - - /* - * Calculate the max number of in-flight bytes for pool-wide -- * scanning operations (minimum 1MB). Limits for the issuing -- * phase are done per top-level vdev and are handled separately. -+ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). -+ * Limits for the issuing phase are done per top-level vdev and -+ * are handled separately. - */ -- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * -- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); -+ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, -+ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); - - avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), - offsetof(scan_ds_t, sds_node)); -@@ -2782,8 +2784,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t - } - - static uint64_t --dsl_scan_count_data_disks(vdev_t *rvd) -+dsl_scan_count_data_disks(spa_t *spa) - { -+ vdev_t *rvd = spa->spa_root_vdev; - uint64_t i, leaves = 0; - - for (i = 0; i < rvd->vdev_children; i++) { -@@ -3678,12 +3681,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t * - taskqid_t prefetch_tqid; - - /* -- * Recalculate the max number of in-flight bytes for pool-wide -- * scanning operations (minimum 1MB). Limits for the issuing -- * phase are done per top-level vdev and are handled separately. -+ * Calculate the max number of in-flight bytes for pool-wide -+ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). -+ * Limits for the issuing phase are done per top-level vdev and -+ * are handled separately. - */ -- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * -- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); -+ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, -+ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); - - if (scnp->scn_ddt_bookmark.ddb_class <= - scnp->scn_ddt_class_max) { -Index: zfs/man/man4/zfs.4 -=================================================================== ---- zfs.orig/man/man4/zfs.4 -+++ zfs/man/man4/zfs.4 -@@ -1833,7 +1833,7 @@ When disabled, the memory limit may be e - Freezes a scrub/resilver in progress without actually pausing it. - Intended for testing/debugging. - . --.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int -+.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 16MB Pc Pq int - Maximum amount of data that can be concurrently issued at once for scrubs and - resilvers per leaf device, given in bytes. - . diff -Nru zfs-linux-2.1.9/debian/patches/0002-System-wide-speculative-prefetch-limit.patch zfs-linux-2.1.9/debian/patches/0002-System-wide-speculative-prefetch-limit.patch --- zfs-linux-2.1.9/debian/patches/0002-System-wide-speculative-prefetch-limit.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0002-System-wide-speculative-prefetch-limit.patch 2023-03-24 17:53:46.000000000 +0800 @@ -0,0 +1,158 @@ +From 9d2e5c14b2f94c91aa389799bd9e80e1098263e7 Mon Sep 17 00:00:00 2001 +From: Alexander Motin +Date: Wed, 1 Mar 2023 18:27:40 -0500 +Subject: [PATCH] System-wide speculative prefetch limit. + +With some pathological access patterns it is possible to make ZFS +accumulate almost unlimited amount of speculative prefetch ZIOs. +Combined with linear ABD allocations in RAIDZ code, it appears to +be possible to exhaust system KVA, triggering kernel panic. + +Address this by introducing a system-wide counter of active prefetch +requests and blocking prefetch distance doubling per stream hits if +the number of active requests is higher that ~6% of ARC size. + +Reviewed-by: Brian Behlendorf +Signed-off-by: Alexander Motin +Sponsored by: iXsystems, Inc. +Closes #14516 +--- + include/sys/arc_impl.h | 1 + + module/zfs/dmu_zfetch.c | 29 ++++++++++++++++++++++++----- + 2 files changed, 25 insertions(+), 5 deletions(-) + +diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h +index 43818d410..db6238fda 100644 +--- a/include/sys/arc_impl.h ++++ b/include/sys/arc_impl.h +@@ -30,6 +30,7 @@ + #define _SYS_ARC_IMPL_H + + #include ++#include + #include + #include + #include +diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c +index bca881d82..d2985d572 100644 +--- a/module/zfs/dmu_zfetch.c ++++ b/module/zfs/dmu_zfetch.c +@@ -28,6 +28,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -65,13 +66,15 @@ typedef struct zfetch_stats { + kstat_named_t zfetchstat_misses; + kstat_named_t zfetchstat_max_streams; + kstat_named_t zfetchstat_io_issued; ++ kstat_named_t zfetchstat_io_active; + } zfetch_stats_t; + + static zfetch_stats_t zfetch_stats = { + { "hits", KSTAT_DATA_UINT64 }, + { "misses", KSTAT_DATA_UINT64 }, + { "max_streams", KSTAT_DATA_UINT64 }, +- { "io_issued", KSTAT_DATA_UINT64 }, ++ { "io_issued", KSTAT_DATA_UINT64 }, ++ { "io_active", KSTAT_DATA_UINT64 }, + }; + + struct { +@@ -79,6 +82,7 @@ struct { + wmsum_t zfetchstat_misses; + wmsum_t zfetchstat_max_streams; + wmsum_t zfetchstat_io_issued; ++ aggsum_t zfetchstat_io_active; + } zfetch_sums; + + #define ZFETCHSTAT_BUMP(stat) \ +@@ -104,6 +108,8 @@ zfetch_kstats_update(kstat_t *ksp, int rw) + wmsum_value(&zfetch_sums.zfetchstat_max_streams); + zs->zfetchstat_io_issued.value.ui64 = + wmsum_value(&zfetch_sums.zfetchstat_io_issued); ++ zs->zfetchstat_io_active.value.ui64 = ++ aggsum_value(&zfetch_sums.zfetchstat_io_active); + return (0); + } + +@@ -114,6 +120,7 @@ zfetch_init(void) + wmsum_init(&zfetch_sums.zfetchstat_misses, 0); + wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0); + wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0); ++ aggsum_init(&zfetch_sums.zfetchstat_io_active, 0); + + zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", + KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), +@@ -138,6 +145,8 @@ zfetch_fini(void) + wmsum_fini(&zfetch_sums.zfetchstat_misses); + wmsum_fini(&zfetch_sums.zfetchstat_max_streams); + wmsum_fini(&zfetch_sums.zfetchstat_io_issued); ++ ASSERT0(aggsum_value(&zfetch_sums.zfetchstat_io_active)); ++ aggsum_fini(&zfetch_sums.zfetchstat_io_active); + } + + /* +@@ -294,6 +303,7 @@ dmu_zfetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t io_issued) + zs->zs_more = B_TRUE; + if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0) + dmu_zfetch_stream_fini(zs); ++ aggsum_add(&zfetch_sums.zfetchstat_io_active, -1); + } + + /* +@@ -407,20 +417,28 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, + * Start prefetch from the demand access size (nblks). Double the + * distance every access up to zfetch_min_distance. After that only + * if needed increase the distance by 1/8 up to zfetch_max_distance. ++ * ++ * Don't double the distance beyond single block if we have more ++ * than ~6% of ARC held by active prefetches. It should help with ++ * getting out of RAM on some badly mispredicted read patterns. + */ +- unsigned int nbytes = nblks << zf->zf_dnode->dn_datablkshift; ++ unsigned int dbs = zf->zf_dnode->dn_datablkshift; ++ unsigned int nbytes = nblks << dbs; + unsigned int pf_nblks; + if (fetch_data) { + if (unlikely(zs->zs_pf_dist < nbytes)) + zs->zs_pf_dist = nbytes; +- else if (zs->zs_pf_dist < zfetch_min_distance) ++ else if (zs->zs_pf_dist < zfetch_min_distance && ++ (zs->zs_pf_dist < (1 << dbs) || ++ aggsum_compare(&zfetch_sums.zfetchstat_io_active, ++ arc_c_max >> (4 + dbs)) < 0)) + zs->zs_pf_dist *= 2; + else if (zs->zs_more) + zs->zs_pf_dist += zs->zs_pf_dist / 8; + zs->zs_more = B_FALSE; + if (zs->zs_pf_dist > zfetch_max_distance) + zs->zs_pf_dist = zfetch_max_distance; +- pf_nblks = zs->zs_pf_dist >> zf->zf_dnode->dn_datablkshift; ++ pf_nblks = zs->zs_pf_dist >> dbs; + } else { + pf_nblks = 0; + } +@@ -439,7 +457,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, + zs->zs_ipf_dist *= 2; + if (zs->zs_ipf_dist > zfetch_max_idistance) + zs->zs_ipf_dist = zfetch_max_idistance; +- pf_nblks = zs->zs_ipf_dist >> zf->zf_dnode->dn_datablkshift; ++ pf_nblks = zs->zs_ipf_dist >> dbs; + if (zs->zs_ipf_start < zs->zs_pf_end) + zs->zs_ipf_start = zs->zs_pf_end; + if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks) +@@ -509,6 +527,7 @@ dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock) + dmu_zfetch_stream_fini(zs); + return; + } ++ aggsum_add(&zfetch_sums.zfetchstat_io_active, issued); + + if (!have_lock) + rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER); +-- +2.30.2 + diff -Nru zfs-linux-2.1.9/debian/patches/0003-Add-missing-increment-to-dsl_deadlist_move_bpobj.patch zfs-linux-2.1.9/debian/patches/0003-Add-missing-increment-to-dsl_deadlist_move_bpobj.patch --- zfs-linux-2.1.9/debian/patches/0003-Add-missing-increment-to-dsl_deadlist_move_bpobj.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0003-Add-missing-increment-to-dsl_deadlist_move_bpobj.patch 2023-03-24 17:56:46.000000000 +0800 @@ -0,0 +1,40 @@ +From 6281b5c4882f12655c9485eebb681665b7422bef Mon Sep 17 00:00:00 2001 +From: Richard Yao +Date: Sat, 4 Mar 2023 18:42:01 -0500 +Subject: [PATCH] Add missing increment to dsl_deadlist_move_bpobj() + +dc5c8006f684b1df3f2d4b6b8c121447d2db0017 was recently merged to prefetch +up to 128 deadlists. Unfortunately, a loop was missing an increment, +such that it will prefetch all deadlists. The performance properties of +that patch probably should be re-evaluated. + +This was caught by CodeQL's cpp/constant-comparison check in an +experimental branch where I am testing the security-and-extended +queries. It complained about the `i < 128` part of the loop condition +always evaluating to the same thing. The standard CodeQL configuration +we use missed this because it does not include that check. + +Reviewed-by: Tino Reichardt +Reviewed-by: Alexander Motin +Signed-off-by: Richard Yao +Closes #14573 +--- + module/zfs/dsl_deadlist.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c +index 1b2d8b92f..d5fe2ee56 100644 +--- a/module/zfs/dsl_deadlist.c ++++ b/module/zfs/dsl_deadlist.c +@@ -936,7 +936,7 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, + * Prefetch up to 128 deadlists first and then more as we progress. + * The limit is a balance between ARC use and diminishing returns. + */ +- for (pdle = dle, i = 0; pdle && i < 128; ) { ++ for (pdle = dle, i = 0; pdle && i < 128; i++) { + bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object); + pdle = AVL_NEXT(&dl->dl_tree, pdle); + } +-- +2.30.2 + diff -Nru zfs-linux-2.1.9/debian/patches/0003-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch zfs-linux-2.1.9/debian/patches/0003-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch --- zfs-linux-2.1.9/debian/patches/0003-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch 2023-02-25 13:00:17.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0003-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,107 +0,0 @@ -From 973934b965268b5333564dbdf4e76b34cc7e7b6f Mon Sep 17 00:00:00 2001 -From: Brian Behlendorf -Date: Tue, 24 Jan 2023 15:23:32 -0800 -Subject: [PATCH] Increase default zfs_rebuild_vdev_limit to 64MB - -When testing distributed rebuild performance with more capable -hardware it was observed than increasing the zfs_rebuild_vdev_limit -to 64M reduced the rebuild time by 17%. Beyond 64MB there was -some improvement (~2%) but it was not significant when weighed -against the increased memory usage. Memory usage is capped at 1/4 -of arc_c_max. - -Additionally, vr_bytes_inflight_max has been moved so it's updated -per-metaslab to allow the size to be adjust while a rebuild is -running. - -Reviewed-by: Akash B -Reviewed-by: Tony Nguyen -Reviewed-by: Alexander Motin -Updated-by: Aron Xu -Signed-off-by: Brian Behlendorf -Closes #14428 ---- - man/man4/zfs.4 | 2 +- - module/zfs/vdev_rebuild.c | 27 ++++++++++++++++++--------- - 2 files changed, 19 insertions(+), 10 deletions(-) - -Index: zfs/man/man4/zfs.4 -=================================================================== ---- zfs.orig/man/man4/zfs.4 -+++ zfs/man/man4/zfs.4 -@@ -1706,7 +1706,7 @@ completes in order to verify the checksu - resilvered. - This is enabled by default and strongly recommended. - . --.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong -+.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 64MB Pc Pq ulong - Maximum amount of I/O that can be concurrently issued for a sequential - resilver per leaf device, given in bytes. - . -Index: zfs/module/zfs/vdev_rebuild.c -=================================================================== ---- zfs.orig/module/zfs/vdev_rebuild.c -+++ zfs/module/zfs/vdev_rebuild.c -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include - #include - - /* -@@ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = - * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep - * the queue depth short. - * -- * 32MB was selected as the default value to achieve good performance with -- * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential -- * rebuild was unable to saturate all of the drives using smaller values. -- * With a value of 32MB the sequential resilver write rate was measured at -- * 800MB/s sustained while rebuilding to a distributed spare. -+ * 64MB was observed to deliver the best performance and set as the default. -+ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c) -+ * and a rebuild rate of 1.2GB/s was measured to the distribute spare. -+ * Smaller values were unable to fully saturate the available pool I/O. - */ --unsigned long zfs_rebuild_vdev_limit = 32 << 20; -+unsigned long zfs_rebuild_vdev_limit = 64 << 20; - - /* - * Automatically start a pool scrub when the last active sequential resilver -@@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg) - { - vdev_t *vd = arg; - spa_t *spa = vd->vdev_spa; -+ vdev_t *rvd = spa->spa_root_vdev; - int error = 0; - - /* -@@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg) - vr->vr_pass_bytes_scanned = 0; - vr->vr_pass_bytes_issued = 0; - -- vr->vr_bytes_inflight_max = MAX(1ULL << 20, -- zfs_rebuild_vdev_limit * vd->vdev_children); -- - uint64_t update_est_time = gethrtime(); - vdev_rebuild_update_bytes_est(vd, 0); - -@@ -805,6 +803,17 @@ vdev_rebuild_thread(void *arg) - vr->vr_scan_msp = msp; - - /* -+ * Calculate the max number of in-flight bytes for top-level -+ * vdev scanning operations (minimum 1MB, maximum 1/4 of -+ * arc_c_max shared by all top-level vdevs). Limits for the -+ * issuing phase are done per top-level vdev and are handled -+ * separately. -+ */ -+ uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); -+ vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, -+ zfs_rebuild_vdev_limit * vd->vdev_children)); -+ -+ /* - * Removal of vdevs from the vdev tree may eliminate the need - * for the rebuild, in which case it should be canceled. The - * vdev_rebuild_cancel_wanted flag is set until the sync task diff -Nru zfs-linux-2.1.9/debian/patches/0004-Increase-default-zfs_scan_vdev_limit-to-16MB.patch zfs-linux-2.1.9/debian/patches/0004-Increase-default-zfs_scan_vdev_limit-to-16MB.patch --- zfs-linux-2.1.9/debian/patches/0004-Increase-default-zfs_scan_vdev_limit-to-16MB.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0004-Increase-default-zfs_scan_vdev_limit-to-16MB.patch 2023-03-24 17:48:40.000000000 +0800 @@ -0,0 +1,117 @@ +From c0aea7cf4e86fc02db8046fbb3bca21a918053a2 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Tue, 24 Jan 2023 14:05:45 -0800 +Subject: [PATCH] Increase default zfs_scan_vdev_limit to 16MB + +For HDD based pools the default zfs_scan_vdev_limit of 4M +per-vdev can significantly limit the maximum scrub performance. +Increasing the default to 16M can double the scrub speed from +80 MB/s per disk to 160 MB/s per disk. + +This does increase the memory footprint during scrub/resilver +but given the performance win this is a reasonable trade off. +Memory usage is capped at 1/4 of arc_c_max. Note that number +of outstanding I/Os has not changed and is still limited by +zfs_vdev_scrub_max_active. + +Reviewed-by: Akash B +Reviewed-by: Tony Nguyen +Reviewed-by: Alexander Motin +Updated-by: Aron Xu +Signed-off-by: Brian Behlendorf +Closes #14428 +--- + man/man4/zfs.4 | 2 +- + module/zfs/dsl_scan.c | 28 ++++++++++++++++------------ + 2 files changed, 17 insertions(+), 13 deletions(-) + +Index: zfs/module/zfs/dsl_scan.c +=================================================================== +--- zfs.orig/module/zfs/dsl_scan.c ++++ zfs/module/zfs/dsl_scan.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -126,7 +127,7 @@ static boolean_t scan_ds_queue_contains( + static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); + static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); + static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); +-static uint64_t dsl_scan_count_data_disks(vdev_t *vd); ++static uint64_t dsl_scan_count_data_disks(spa_t *spa); + + extern int zfs_vdev_async_write_active_min_dirty_percent; + static int zfs_scan_blkstats = 0; +@@ -147,7 +148,7 @@ int zfs_scan_strict_mem_lim = B_FALSE; + * overload the drives with I/O, since that is protected by + * zfs_vdev_scrub_max_active. + */ +-unsigned long zfs_scan_vdev_limit = 4 << 20; ++unsigned long zfs_scan_vdev_limit = 16 << 20; + + int zfs_scan_issue_strategy = 0; + int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */ +@@ -450,11 +451,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t t + + /* + * Calculate the max number of in-flight bytes for pool-wide +- * scanning operations (minimum 1MB). Limits for the issuing +- * phase are done per top-level vdev and are handled separately. ++ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). ++ * Limits for the issuing phase are done per top-level vdev and ++ * are handled separately. + */ +- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * +- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); ++ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, ++ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); + + avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), + offsetof(scan_ds_t, sds_node)); +@@ -2782,8 +2784,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t + } + + static uint64_t +-dsl_scan_count_data_disks(vdev_t *rvd) ++dsl_scan_count_data_disks(spa_t *spa) + { ++ vdev_t *rvd = spa->spa_root_vdev; + uint64_t i, leaves = 0; + + for (i = 0; i < rvd->vdev_children; i++) { +@@ -3678,12 +3681,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t * + taskqid_t prefetch_tqid; + + /* +- * Recalculate the max number of in-flight bytes for pool-wide +- * scanning operations (minimum 1MB). Limits for the issuing +- * phase are done per top-level vdev and are handled separately. ++ * Calculate the max number of in-flight bytes for pool-wide ++ * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). ++ * Limits for the issuing phase are done per top-level vdev and ++ * are handled separately. + */ +- scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * +- dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); ++ scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, ++ zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); + + if (scnp->scn_ddt_bookmark.ddb_class <= + scnp->scn_ddt_class_max) { +Index: zfs/man/man4/zfs.4 +=================================================================== +--- zfs.orig/man/man4/zfs.4 ++++ zfs/man/man4/zfs.4 +@@ -1833,7 +1833,7 @@ When disabled, the memory limit may be e + Freezes a scrub/resilver in progress without actually pausing it. + Intended for testing/debugging. + . +-.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int ++.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 16MB Pc Pq int + Maximum amount of data that can be concurrently issued at once for scrubs and + resilvers per leaf device, given in bytes. + . diff -Nru zfs-linux-2.1.9/debian/patches/0004-rootdelay-on-zfs-should-be-adaptive.patch zfs-linux-2.1.9/debian/patches/0004-rootdelay-on-zfs-should-be-adaptive.patch --- zfs-linux-2.1.9/debian/patches/0004-rootdelay-on-zfs-should-be-adaptive.patch 2023-02-25 12:48:30.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0004-rootdelay-on-zfs-should-be-adaptive.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,95 +0,0 @@ -From f18e083bf8ce0c0d1997002f9986122be6d4ebe8 Mon Sep 17 00:00:00 2001 -From: George Wilson -Date: Thu, 2 Feb 2023 18:11:35 -0500 -Subject: [PATCH] rootdelay on zfs should be adaptive - -The 'rootdelay' boot option currently pauses the boot for a specified -amount of time. The original intent was to ensure that slower -configurations would have ample time to enumerate the devices to make -importing the root pool successful. This, however, causes unnecessary -boot delay for environments like Azure which set this parameter by -default. - -This commit changes the initramfs logic to pause until it can -successfully load the 'zfs' module. The timeout specified by -'rootdelay' now becomes the maximum amount of time that initramfs will -wait before failing the boot. - -Reviewed-by: Brian Behlendorf -Reviewed-by: Prakash Surya -Signed-off-by: George Wilson -Closes #14430 ---- - contrib/initramfs/scripts/zfs | 54 +++++++++++++++++++++++------------ - 1 file changed, 35 insertions(+), 19 deletions(-) - -Index: zfs/contrib/initramfs/scripts/zfs -=================================================================== ---- zfs.orig/contrib/initramfs/scripts/zfs -+++ zfs/contrib/initramfs/scripts/zfs -@@ -270,30 +270,46 @@ import_pool() - # with more logging etc. - load_module_initrd() - { -- [ -n "$ROOTDELAY" ] && ZFS_INITRD_PRE_MOUNTROOT_SLEEP="$ROOTDELAY" -+ ZFS_INITRD_PRE_MOUNTROOT_SLEEP=${ROOTDELAY:-0} - -- if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ] 2>/dev/null -- then -- if [ "$quiet" != "y" ]; then -- zfs_log_begin_msg "Sleeping for" \ -- "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP seconds..." -- fi -- sleep "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -- [ "$quiet" != "y" ] && zfs_log_end_msg -+ if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then -+ [ "$quiet" != "y" ] && zfs_log_begin_msg "Delaying for up to '${ZFS_INITRD_PRE_MOUNTROOT_SLEEP}' seconds." - fi - -- # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. -- if command -v wait_for_udev > /dev/null 2>&1 ; then -- wait_for_udev 10 -- elif command -v wait_for_dev > /dev/null 2>&1 ; then -- wait_for_dev -- fi -+ START=$(/bin/date -u +%s) -+ END=$((START+ZFS_INITRD_PRE_MOUNTROOT_SLEEP)) -+ while true; do -+ -+ # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. -+ if command -v wait_for_udev > /dev/null 2>&1 ; then -+ wait_for_udev 10 -+ elif command -v wait_for_dev > /dev/null 2>&1 ; then -+ wait_for_dev -+ fi - -- # zpool import refuse to import without a valid /proc/self/mounts -- [ ! -f /proc/self/mounts ] && mount proc /proc -+ # -+ # zpool import refuse to import without a valid -+ # /proc/self/mounts -+ # -+ [ ! -f /proc/self/mounts ] && mount proc /proc -+ -+ # Load the module -+ if load_module "zfs"; then -+ ret=0 -+ break -+ else -+ ret=1 -+ fi -+ -+ [ "$(/bin/date -u +%s)" -gt "$END" ] && break -+ sleep 1 -+ -+ done -+ if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then -+ [ "$quiet" != "y" ] && zfs_log_end_msg -+ fi - -- # Load the module -- load_module "zfs" || return 1 -+ [ "$ret" -ne 0 ] && return 1 - - if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null - then diff -Nru zfs-linux-2.1.9/debian/patches/0005-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch zfs-linux-2.1.9/debian/patches/0005-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch --- zfs-linux-2.1.9/debian/patches/0005-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0005-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch 2023-03-24 17:48:40.000000000 +0800 @@ -0,0 +1,107 @@ +From 973934b965268b5333564dbdf4e76b34cc7e7b6f Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Tue, 24 Jan 2023 15:23:32 -0800 +Subject: [PATCH] Increase default zfs_rebuild_vdev_limit to 64MB + +When testing distributed rebuild performance with more capable +hardware it was observed than increasing the zfs_rebuild_vdev_limit +to 64M reduced the rebuild time by 17%. Beyond 64MB there was +some improvement (~2%) but it was not significant when weighed +against the increased memory usage. Memory usage is capped at 1/4 +of arc_c_max. + +Additionally, vr_bytes_inflight_max has been moved so it's updated +per-metaslab to allow the size to be adjust while a rebuild is +running. + +Reviewed-by: Akash B +Reviewed-by: Tony Nguyen +Reviewed-by: Alexander Motin +Updated-by: Aron Xu +Signed-off-by: Brian Behlendorf +Closes #14428 +--- + man/man4/zfs.4 | 2 +- + module/zfs/vdev_rebuild.c | 27 ++++++++++++++++++--------- + 2 files changed, 19 insertions(+), 10 deletions(-) + +Index: zfs/man/man4/zfs.4 +=================================================================== +--- zfs.orig/man/man4/zfs.4 ++++ zfs/man/man4/zfs.4 +@@ -1706,7 +1706,7 @@ completes in order to verify the checksu + resilvered. + This is enabled by default and strongly recommended. + . +-.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong ++.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 64MB Pc Pq ulong + Maximum amount of I/O that can be concurrently issued for a sequential + resilver per leaf device, given in bytes. + . +Index: zfs/module/zfs/vdev_rebuild.c +=================================================================== +--- zfs.orig/module/zfs/vdev_rebuild.c ++++ zfs/module/zfs/vdev_rebuild.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + + /* +@@ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = + * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep + * the queue depth short. + * +- * 32MB was selected as the default value to achieve good performance with +- * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential +- * rebuild was unable to saturate all of the drives using smaller values. +- * With a value of 32MB the sequential resilver write rate was measured at +- * 800MB/s sustained while rebuilding to a distributed spare. ++ * 64MB was observed to deliver the best performance and set as the default. ++ * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c) ++ * and a rebuild rate of 1.2GB/s was measured to the distribute spare. ++ * Smaller values were unable to fully saturate the available pool I/O. + */ +-unsigned long zfs_rebuild_vdev_limit = 32 << 20; ++unsigned long zfs_rebuild_vdev_limit = 64 << 20; + + /* + * Automatically start a pool scrub when the last active sequential resilver +@@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg) + { + vdev_t *vd = arg; + spa_t *spa = vd->vdev_spa; ++ vdev_t *rvd = spa->spa_root_vdev; + int error = 0; + + /* +@@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg) + vr->vr_pass_bytes_scanned = 0; + vr->vr_pass_bytes_issued = 0; + +- vr->vr_bytes_inflight_max = MAX(1ULL << 20, +- zfs_rebuild_vdev_limit * vd->vdev_children); +- + uint64_t update_est_time = gethrtime(); + vdev_rebuild_update_bytes_est(vd, 0); + +@@ -805,6 +803,17 @@ vdev_rebuild_thread(void *arg) + vr->vr_scan_msp = msp; + + /* ++ * Calculate the max number of in-flight bytes for top-level ++ * vdev scanning operations (minimum 1MB, maximum 1/4 of ++ * arc_c_max shared by all top-level vdevs). Limits for the ++ * issuing phase are done per top-level vdev and are handled ++ * separately. ++ */ ++ uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); ++ vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, ++ zfs_rebuild_vdev_limit * vd->vdev_children)); ++ ++ /* + * Removal of vdevs from the vdev tree may eliminate the need + * for the rebuild, in which case it should be canceled. The + * vdev_rebuild_cancel_wanted flag is set until the sync task diff -Nru zfs-linux-2.1.9/debian/patches/0005-initramfs-Make-mountpoint-none-work.patch zfs-linux-2.1.9/debian/patches/0005-initramfs-Make-mountpoint-none-work.patch --- zfs-linux-2.1.9/debian/patches/0005-initramfs-Make-mountpoint-none-work.patch 2023-02-25 13:06:33.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0005-initramfs-Make-mountpoint-none-work.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,34 +0,0 @@ -From 8473829d1f1f2d30d2629364204cda071749af60 Mon Sep 17 00:00:00 2001 -From: Ryan Moeller -Date: Mon, 6 Feb 2023 14:16:01 -0500 -Subject: [PATCH] initramfs: Make mountpoint=none work - -In initramfs, mount.zfs fails to mount a dataset with mountpoint=none, -but mount.zfs -o zfsutil works. Use -o zfsutil when mountpoint=none. - -Reviewed-by: Brian Behlendorf -Reviewed-by: Richard Yao -Signed-off-by: Ryan Moeller -Closes #14455 -(cherry picked from commit eb823cbc76d28a7cafdf6a7aafdefe7e74fe26bc) ---- - contrib/initramfs/scripts/zfs | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -Index: zfs/contrib/initramfs/scripts/zfs -=================================================================== ---- zfs.orig/contrib/initramfs/scripts/zfs -+++ zfs/contrib/initramfs/scripts/zfs -@@ -357,9 +357,11 @@ mount_fs() - # isn't the root fs. - return 0 - fi -- ZFS_CMD="mount.zfs" - # Last hail-mary: Hope 'rootmnt' is set! - mountpoint="" -+ if [ "$mountpoint" = "legacy" ]; then -+ ZFS_CMD="mount.zfs" -+ fi - else - mountpoint="$mountpoint1" - fi diff -Nru zfs-linux-2.1.9/debian/patches/0006-rootdelay-on-zfs-should-be-adaptive.patch zfs-linux-2.1.9/debian/patches/0006-rootdelay-on-zfs-should-be-adaptive.patch --- zfs-linux-2.1.9/debian/patches/0006-rootdelay-on-zfs-should-be-adaptive.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0006-rootdelay-on-zfs-should-be-adaptive.patch 2023-03-24 17:48:40.000000000 +0800 @@ -0,0 +1,95 @@ +From f18e083bf8ce0c0d1997002f9986122be6d4ebe8 Mon Sep 17 00:00:00 2001 +From: George Wilson +Date: Thu, 2 Feb 2023 18:11:35 -0500 +Subject: [PATCH] rootdelay on zfs should be adaptive + +The 'rootdelay' boot option currently pauses the boot for a specified +amount of time. The original intent was to ensure that slower +configurations would have ample time to enumerate the devices to make +importing the root pool successful. This, however, causes unnecessary +boot delay for environments like Azure which set this parameter by +default. + +This commit changes the initramfs logic to pause until it can +successfully load the 'zfs' module. The timeout specified by +'rootdelay' now becomes the maximum amount of time that initramfs will +wait before failing the boot. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Prakash Surya +Signed-off-by: George Wilson +Closes #14430 +--- + contrib/initramfs/scripts/zfs | 54 +++++++++++++++++++++++------------ + 1 file changed, 35 insertions(+), 19 deletions(-) + +Index: zfs/contrib/initramfs/scripts/zfs +=================================================================== +--- zfs.orig/contrib/initramfs/scripts/zfs ++++ zfs/contrib/initramfs/scripts/zfs +@@ -270,30 +270,46 @@ import_pool() + # with more logging etc. + load_module_initrd() + { +- [ -n "$ROOTDELAY" ] && ZFS_INITRD_PRE_MOUNTROOT_SLEEP="$ROOTDELAY" ++ ZFS_INITRD_PRE_MOUNTROOT_SLEEP=${ROOTDELAY:-0} + +- if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ] 2>/dev/null +- then +- if [ "$quiet" != "y" ]; then +- zfs_log_begin_msg "Sleeping for" \ +- "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP seconds..." +- fi +- sleep "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" +- [ "$quiet" != "y" ] && zfs_log_end_msg ++ if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then ++ [ "$quiet" != "y" ] && zfs_log_begin_msg "Delaying for up to '${ZFS_INITRD_PRE_MOUNTROOT_SLEEP}' seconds." + fi + +- # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. +- if command -v wait_for_udev > /dev/null 2>&1 ; then +- wait_for_udev 10 +- elif command -v wait_for_dev > /dev/null 2>&1 ; then +- wait_for_dev +- fi ++ START=$(/bin/date -u +%s) ++ END=$((START+ZFS_INITRD_PRE_MOUNTROOT_SLEEP)) ++ while true; do ++ ++ # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. ++ if command -v wait_for_udev > /dev/null 2>&1 ; then ++ wait_for_udev 10 ++ elif command -v wait_for_dev > /dev/null 2>&1 ; then ++ wait_for_dev ++ fi + +- # zpool import refuse to import without a valid /proc/self/mounts +- [ ! -f /proc/self/mounts ] && mount proc /proc ++ # ++ # zpool import refuse to import without a valid ++ # /proc/self/mounts ++ # ++ [ ! -f /proc/self/mounts ] && mount proc /proc ++ ++ # Load the module ++ if load_module "zfs"; then ++ ret=0 ++ break ++ else ++ ret=1 ++ fi ++ ++ [ "$(/bin/date -u +%s)" -gt "$END" ] && break ++ sleep 1 ++ ++ done ++ if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then ++ [ "$quiet" != "y" ] && zfs_log_end_msg ++ fi + +- # Load the module +- load_module "zfs" || return 1 ++ [ "$ret" -ne 0 ] && return 1 + + if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null + then diff -Nru zfs-linux-2.1.9/debian/patches/0006-zdb-zero-pad-checksum-output.patch zfs-linux-2.1.9/debian/patches/0006-zdb-zero-pad-checksum-output.patch --- zfs-linux-2.1.9/debian/patches/0006-zdb-zero-pad-checksum-output.patch 2023-02-25 13:06:37.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0006-zdb-zero-pad-checksum-output.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,40 +0,0 @@ -From ac7648179c856750b719c7a9e0464466df390b26 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Rob=20N=20=E2=98=85?= -Date: Wed, 8 Feb 2023 08:48:22 +1100 -Subject: [PATCH] zdb: zero-pad checksum output - -The leading zeroes are part of the checksum so we should show them. - -Reviewed-by: Richard Yao -Reviewed-by: Brian Behlendorf -Signed-off-by: Rob Norris -Closes #14464 ---- - cmd/zdb/zdb.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -Index: zfs/cmd/zdb/zdb.c -=================================================================== ---- zfs.orig/cmd/zdb/zdb.c -+++ zfs/cmd/zdb/zdb.c -@@ -2305,7 +2305,8 @@ snprintf_blkptr_compact(char *blkbuf, si - (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), " %s", "FREE"); - (void) snprintf(blkbuf + strlen(blkbuf), -- buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", -+ buflen - strlen(blkbuf), -+ " cksum=%016llx:%016llx:%016llx:%016llx", - (u_longlong_t)bp->blk_cksum.zc_word[0], - (u_longlong_t)bp->blk_cksum.zc_word[1], - (u_longlong_t)bp->blk_cksum.zc_word[2], -@@ -8213,7 +8214,9 @@ zdb_read_block(char *thing, spa_t *spa) - DVA_GET_OFFSET(&bp->blk_dva[0]); - ck_zio->io_bp = bp; - zio_checksum_compute(ck_zio, ck, pabd, lsize); -- printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", -+ printf( -+ "%12s\t" -+ "cksum=%016llx:%016llx:%016llx:%016llx\n", - zio_checksum_table[ck].ci_name, - (u_longlong_t)bp->blk_cksum.zc_word[0], - (u_longlong_t)bp->blk_cksum.zc_word[1], diff -Nru zfs-linux-2.1.9/debian/patches/0007-initramfs-Make-mountpoint-none-work.patch zfs-linux-2.1.9/debian/patches/0007-initramfs-Make-mountpoint-none-work.patch --- zfs-linux-2.1.9/debian/patches/0007-initramfs-Make-mountpoint-none-work.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0007-initramfs-Make-mountpoint-none-work.patch 2023-03-24 17:48:40.000000000 +0800 @@ -0,0 +1,34 @@ +From 8473829d1f1f2d30d2629364204cda071749af60 Mon Sep 17 00:00:00 2001 +From: Ryan Moeller +Date: Mon, 6 Feb 2023 14:16:01 -0500 +Subject: [PATCH] initramfs: Make mountpoint=none work + +In initramfs, mount.zfs fails to mount a dataset with mountpoint=none, +but mount.zfs -o zfsutil works. Use -o zfsutil when mountpoint=none. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Richard Yao +Signed-off-by: Ryan Moeller +Closes #14455 +(cherry picked from commit eb823cbc76d28a7cafdf6a7aafdefe7e74fe26bc) +--- + contrib/initramfs/scripts/zfs | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +Index: zfs/contrib/initramfs/scripts/zfs +=================================================================== +--- zfs.orig/contrib/initramfs/scripts/zfs ++++ zfs/contrib/initramfs/scripts/zfs +@@ -357,9 +357,11 @@ mount_fs() + # isn't the root fs. + return 0 + fi +- ZFS_CMD="mount.zfs" + # Last hail-mary: Hope 'rootmnt' is set! + mountpoint="" ++ if [ "$mountpoint" = "legacy" ]; then ++ ZFS_CMD="mount.zfs" ++ fi + else + mountpoint="$mountpoint1" + fi diff -Nru zfs-linux-2.1.9/debian/patches/0007-zdb-zero-pad-checksum-output-follow-up.patch zfs-linux-2.1.9/debian/patches/0007-zdb-zero-pad-checksum-output-follow-up.patch --- zfs-linux-2.1.9/debian/patches/0007-zdb-zero-pad-checksum-output-follow-up.patch 2023-02-26 12:32:47.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0007-zdb-zero-pad-checksum-output-follow-up.patch 1970-01-01 08:00:00.000000000 +0800 @@ -1,34 +0,0 @@ -From 57cfae4a2f04aaff10c45b3f7975e0fe3ef3e8b8 Mon Sep 17 00:00:00 2001 -From: Brian Behlendorf -Date: Wed, 15 Feb 2023 09:06:29 -0800 -Subject: [PATCH 01/13] zdb: zero-pad checksum output follow up - -Apply zero padding for checksums consistently. The SNPRINTF_BLKPTR -macro was not updated in commit ac7648179c8 which results in the -`cli_root/zdb/zdb_checksum.ksh` test case reliably failing. - -Reviewed-by: Igor Kozhukhov -Reviewed-by: Akash B -Reviewed-by: Brian Atkinson -Signed-off-by: Brian Behlendorf -Closes #14497 ---- - include/sys/spa.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/sys/spa.h b/include/sys/spa.h -index 500eb3491..c9d03bf64 100644 ---- a/include/sys/spa.h -+++ b/include/sys/spa.h -@@ -678,7 +678,7 @@ typedef struct blkptr { - len += func(buf + len, size - len, \ - "[L%llu %s] %s %s %s %s %s %s %s%c" \ - "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ -- "cksum=%llx:%llx:%llx:%llx", \ -+ "cksum=%016llx:%016llx:%016llx:%016llx", \ - (u_longlong_t)BP_GET_LEVEL(bp), \ - type, \ - checksum, \ --- -2.39.2 - diff -Nru zfs-linux-2.1.9/debian/patches/0008-initramfs-fix-zpool-get-argument-order.patch zfs-linux-2.1.9/debian/patches/0008-initramfs-fix-zpool-get-argument-order.patch --- zfs-linux-2.1.9/debian/patches/0008-initramfs-fix-zpool-get-argument-order.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0008-initramfs-fix-zpool-get-argument-order.patch 2023-03-25 23:00:35.000000000 +0800 @@ -0,0 +1,50 @@ +From 050be02c02f166b12fa80e4db998818b077c41e9 Mon Sep 17 00:00:00 2001 +From: q66 +Date: Tue, 7 Mar 2023 02:07:01 +0100 +Subject: [PATCH] initramfs: fix zpool get argument order + +When using the zfs initramfs scripts on my system, I get various +errors at initramfs stage, such as: + +cannot open '-o': name must begin with a letter + +My zfs binaries are compiled with musl libc, which may be why +this happens. In any case, fix the argument order to make the +zpool binary happy, and to match its --help output. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Richard Yao +Signed-off-by: Daniel Kolesa +Closes #14572 +--- + contrib/initramfs/scripts/zfs | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +Index: zfs/contrib/initramfs/scripts/zfs +=================================================================== +--- zfs.orig/contrib/initramfs/scripts/zfs ++++ zfs/contrib/initramfs/scripts/zfs +@@ -192,7 +192,7 @@ import_pool() + + # Verify that the pool isn't already imported + # Make as sure as we can to not require '-f' to import. +- "${ZPOOL}" get name,guid -o value -H 2>/dev/null | grep -Fxq "$pool" && return 0 ++ "${ZPOOL}" get -H -o value name,guid 2>/dev/null | grep -Fxq "$pool" && return 0 + + # For backwards compatibility, make sure that ZPOOL_IMPORT_PATH is set + # to something we can use later with the real import(s). We want to +@@ -898,12 +898,12 @@ mountroot() + fi + + # In case the pool was specified as guid, resolve guid to name +- pool="$("${ZPOOL}" get name,guid -o name,value -H | \ ++ pool="$("${ZPOOL}" get -H -o name,value name,guid | \ + awk -v pool="${ZFS_RPOOL}" '$2 == pool { print $1 }')" + if [ -n "$pool" ]; then + # If $ZFS_BOOTFS contains guid, replace the guid portion with $pool + ZFS_BOOTFS=$(echo "$ZFS_BOOTFS" | \ +- sed -e "s/$("${ZPOOL}" get guid -o value "$pool" -H)/$pool/g") ++ sed -e "s/$("${ZPOOL}" get -H -o value guid "$pool")/$pool/g") + ZFS_RPOOL="${pool}" + fi + diff -Nru zfs-linux-2.1.9/debian/patches/0009-zdb-zero-pad-checksum-output.patch zfs-linux-2.1.9/debian/patches/0009-zdb-zero-pad-checksum-output.patch --- zfs-linux-2.1.9/debian/patches/0009-zdb-zero-pad-checksum-output.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0009-zdb-zero-pad-checksum-output.patch 2023-03-24 17:48:40.000000000 +0800 @@ -0,0 +1,40 @@ +From ac7648179c856750b719c7a9e0464466df390b26 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Rob=20N=20=E2=98=85?= +Date: Wed, 8 Feb 2023 08:48:22 +1100 +Subject: [PATCH] zdb: zero-pad checksum output + +The leading zeroes are part of the checksum so we should show them. + +Reviewed-by: Richard Yao +Reviewed-by: Brian Behlendorf +Signed-off-by: Rob Norris +Closes #14464 +--- + cmd/zdb/zdb.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +Index: zfs/cmd/zdb/zdb.c +=================================================================== +--- zfs.orig/cmd/zdb/zdb.c ++++ zfs/cmd/zdb/zdb.c +@@ -2305,7 +2305,8 @@ snprintf_blkptr_compact(char *blkbuf, si + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), " %s", "FREE"); + (void) snprintf(blkbuf + strlen(blkbuf), +- buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", ++ buflen - strlen(blkbuf), ++ " cksum=%016llx:%016llx:%016llx:%016llx", + (u_longlong_t)bp->blk_cksum.zc_word[0], + (u_longlong_t)bp->blk_cksum.zc_word[1], + (u_longlong_t)bp->blk_cksum.zc_word[2], +@@ -8213,7 +8214,9 @@ zdb_read_block(char *thing, spa_t *spa) + DVA_GET_OFFSET(&bp->blk_dva[0]); + ck_zio->io_bp = bp; + zio_checksum_compute(ck_zio, ck, pabd, lsize); +- printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", ++ printf( ++ "%12s\t" ++ "cksum=%016llx:%016llx:%016llx:%016llx\n", + zio_checksum_table[ck].ci_name, + (u_longlong_t)bp->blk_cksum.zc_word[0], + (u_longlong_t)bp->blk_cksum.zc_word[1], diff -Nru zfs-linux-2.1.9/debian/patches/0010-zdb-zero-pad-checksum-output-follow-up.patch zfs-linux-2.1.9/debian/patches/0010-zdb-zero-pad-checksum-output-follow-up.patch --- zfs-linux-2.1.9/debian/patches/0010-zdb-zero-pad-checksum-output-follow-up.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0010-zdb-zero-pad-checksum-output-follow-up.patch 2023-03-24 17:48:40.000000000 +0800 @@ -0,0 +1,34 @@ +From 57cfae4a2f04aaff10c45b3f7975e0fe3ef3e8b8 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Wed, 15 Feb 2023 09:06:29 -0800 +Subject: [PATCH 01/13] zdb: zero-pad checksum output follow up + +Apply zero padding for checksums consistently. The SNPRINTF_BLKPTR +macro was not updated in commit ac7648179c8 which results in the +`cli_root/zdb/zdb_checksum.ksh` test case reliably failing. + +Reviewed-by: Igor Kozhukhov +Reviewed-by: Akash B +Reviewed-by: Brian Atkinson +Signed-off-by: Brian Behlendorf +Closes #14497 +--- + include/sys/spa.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/sys/spa.h b/include/sys/spa.h +index 500eb3491..c9d03bf64 100644 +--- a/include/sys/spa.h ++++ b/include/sys/spa.h +@@ -678,7 +678,7 @@ typedef struct blkptr { + len += func(buf + len, size - len, \ + "[L%llu %s] %s %s %s %s %s %s %s%c" \ + "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ +- "cksum=%llx:%llx:%llx:%llx", \ ++ "cksum=%016llx:%016llx:%016llx:%016llx", \ + (u_longlong_t)BP_GET_LEVEL(bp), \ + type, \ + checksum, \ +-- +2.39.2 + diff -Nru zfs-linux-2.1.9/debian/patches/0011-Fix-for-mountpoint-legacy.patch zfs-linux-2.1.9/debian/patches/0011-Fix-for-mountpoint-legacy.patch --- zfs-linux-2.1.9/debian/patches/0011-Fix-for-mountpoint-legacy.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0011-Fix-for-mountpoint-legacy.patch 2023-03-25 23:00:44.000000000 +0800 @@ -0,0 +1,35 @@ +From a5c469c5f380b09705ad0bee15e2ca7a5f78213c Mon Sep 17 00:00:00 2001 +From: ofthesun9 +Date: Wed, 15 Mar 2023 00:40:55 +0100 +Subject: [PATCH 1/2] Fix for mountpoint=legacy + +We need to clear mountpoint only after checking it. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Richard Yao +Signed-off-by: ofthesun9 +Closes #14599 +Closes #14604 +--- + contrib/initramfs/scripts/zfs | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +Index: zfs/contrib/initramfs/scripts/zfs +=================================================================== +--- zfs.orig/contrib/initramfs/scripts/zfs ++++ zfs/contrib/initramfs/scripts/zfs +@@ -357,11 +357,12 @@ mount_fs() + # isn't the root fs. + return 0 + fi +- # Last hail-mary: Hope 'rootmnt' is set! +- mountpoint="" ++ # Don't use mount.zfs -o zfsutils for legacy mountpoint + if [ "$mountpoint" = "legacy" ]; then + ZFS_CMD="mount.zfs" + fi ++ # Last hail-mary: Hope 'rootmnt' is set! ++ mountpoint="" + else + mountpoint="$mountpoint1" + fi diff -Nru zfs-linux-2.1.9/debian/patches/0012-QAT-Fix-uninitialized-seed-in-QAT-compression.patch zfs-linux-2.1.9/debian/patches/0012-QAT-Fix-uninitialized-seed-in-QAT-compression.patch --- zfs-linux-2.1.9/debian/patches/0012-QAT-Fix-uninitialized-seed-in-QAT-compression.patch 1970-01-01 08:00:00.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/0012-QAT-Fix-uninitialized-seed-in-QAT-compression.patch 2023-03-25 22:59:09.000000000 +0800 @@ -0,0 +1,37 @@ +From 345f8beb583534ba1f9afcc027437a2e9ba3c875 Mon Sep 17 00:00:00 2001 +From: naivekun +Date: Fri, 17 Mar 2023 02:54:10 +0800 +Subject: [PATCH 2/2] QAT: Fix uninitialized seed in QAT compression + +CpaDcRqResults have to be initialized with checksum=1 for adler32. +Otherwise when error CPA_DC_OVERFLOW occurred, the next compress +operation will continue on previously part-compressed data, and write +invalid checksum data. When zfs decompress the compressed data, a +invalid checksum will occurred and lead to #14463 + +Reviewed-by: Tino Reichardt +Reviewed-by: Weigang Li +Reviewed-by: Chengfei Zhu +Signed-off-by: naivekun +Closes #14632 +Closes #14463 +--- + module/os/linux/zfs/qat_compress.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/module/os/linux/zfs/qat_compress.c b/module/os/linux/zfs/qat_compress.c +index 1d099c95b..64e19e037 100644 +--- a/module/os/linux/zfs/qat_compress.c ++++ b/module/os/linux/zfs/qat_compress.c +@@ -247,7 +247,7 @@ qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len, + Cpa8U *buffer_meta_src = NULL; + Cpa8U *buffer_meta_dst = NULL; + Cpa32U buffer_meta_size = 0; +- CpaDcRqResults dc_results; ++ CpaDcRqResults dc_results = {.checksum = 1}; + CpaStatus status = CPA_STATUS_FAIL; + Cpa32U hdr_sz = 0; + Cpa32U compressed_sz; +-- +2.30.2 + diff -Nru zfs-linux-2.1.9/debian/patches/series zfs-linux-2.1.9/debian/patches/series --- zfs-linux-2.1.9/debian/patches/series 2023-02-26 12:32:47.000000000 +0800 +++ zfs-linux-2.1.9/debian/patches/series 2023-03-25 23:04:29.000000000 +0800 @@ -24,9 +24,14 @@ #ubuntu/4510-silently-ignore-modprobe-failure.patch #ubuntu/4751-suppress-types.patch 0001-Prefetch-on-deadlists-merge.patch -0002-Increase-default-zfs_scan_vdev_limit-to-16MB.patch -0003-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch -0004-rootdelay-on-zfs-should-be-adaptive.patch -0005-initramfs-Make-mountpoint-none-work.patch -0006-zdb-zero-pad-checksum-output.patch -0007-zdb-zero-pad-checksum-output-follow-up.patch +0002-System-wide-speculative-prefetch-limit.patch +0003-Add-missing-increment-to-dsl_deadlist_move_bpobj.patch +0004-Increase-default-zfs_scan_vdev_limit-to-16MB.patch +0005-Increase-default-zfs_rebuild_vdev_limit-to-64MB.patch +0006-rootdelay-on-zfs-should-be-adaptive.patch +0007-initramfs-Make-mountpoint-none-work.patch +0008-initramfs-fix-zpool-get-argument-order.patch +0009-zdb-zero-pad-checksum-output.patch +0010-zdb-zero-pad-checksum-output-follow-up.patch +0011-Fix-for-mountpoint-legacy.patch +0012-QAT-Fix-uninitialized-seed-in-QAT-compression.patch