During disk revalidation done with sd_revalidate(), the zones of a
zoned disk zones are checked using the helper function
blk_revalidate_disk_zones() if a zone configuration change is detected
(change in the number of zones or zone size). The function
blk_revalidate_disk_zones() issues report_zones calls that are very
large, that is, to obtain zone information for all zones of the disk
with a single command. The size of the report zones command buffer
necessary for such large request generally is lower than the disk
max_hw_sectors and KMALLOC_MAX_SIZE (4MB) but still very large (e.g.
aboiut 3.5MB for a 15TB disk with 256MB zones). This large report zones
reply buffer allocation with kmalloc succeeds on boot, but frequently
fails at run time, especially for a system under memory pressure. This
causes the disk revalidation to fail and the disk capacity to be
changed to 0.

This problem can be avoided with a more intelligent report zones buffer
allocation. This patch introduces the arbitrary SD_ZBC_REPORT_SIZE
allocation limit of 1MB allowing to fit 16383 zone descriptor for every
report zone command execution, thus allowing a full zone report with 4
or 5 commands for most ZBC/ZAC disks today. This limit may be lowered to
satisfy the disk max_hw_sectors limit. Furthermore, further reduce the
likelyhood of a buffer allocation failure while guaranteeing progress
in the zone report by retrying the buffer allocation with a smaller
size in case kmalloc() fails.

Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer 
allocation")
Fixes: e76239a3748c ("block: add a report_zones method")
Cc: sta...@vger.kernel.org
Signed-off-by: Damien Le Moal <damien.lem...@wdc.com>
---
 drivers/scsi/sd_zbc.c | 54 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 7334024b64f1..37469d77264e 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -103,6 +103,44 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, 
unsigned char *buf,
        return 0;
 }
 
+/**
+ * Arbitrary maximum report zones buffer size of 1MB, fitting 16383 x 64B zone
+ * descriptors plus the 64B report header.
+ */
+#define SD_ZBC_REPORT_SIZE (16384U * 64U)
+
+/**
+ * Allocate a buffer for report zones.
+ */
+static void *sd_zbc_alloc_report_buffer(struct gendisk *disk, size_t *buflen,
+                                       gfp_t gfp_mask)
+{
+       struct page *page;
+       size_t bufsize;
+       int order;
+
+       /*
+        * Limit the command buffer size to the arbitrary SD_ZBC_REPORT_SIZE
+        * size (1MB), allowing up to 16383 zone descriptors being reported with
+        * a single command. And make sure that this size does not exceed the
+        * hardware capabilities. To avoid disk revalidation failures due to
+        * memory allocation errors, retry the allocation with a smaller buffer
+        * size if the allocation fails.
+        */
+       bufsize = min_t(size_t, *buflen, SD_ZBC_REPORT_SIZE);
+       bufsize = min_t(size_t, bufsize,
+                       queue_max_hw_sectors(disk->queue) << 9);
+       for (order = get_order(bufsize); order >= 0; order--) {
+               page = alloc_pages(gfp_mask, order);
+               if (page) {
+                       *buflen = PAGE_SIZE << order;
+                       return page_address(page);
+               }
+       }
+
+       return NULL;
+}
+
 /**
  * sd_zbc_report_zones - Disk report zones operation.
  * @disk: The target disk
@@ -118,9 +156,9 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t 
sector,
                        gfp_t gfp_mask)
 {
        struct scsi_disk *sdkp = scsi_disk(disk);
-       unsigned int i, buflen, nrz = *nr_zones;
+       unsigned int i, nrz = *nr_zones;
        unsigned char *buf;
-       size_t offset = 0;
+       size_t buflen, offset = 0;
        int ret = 0;
 
        if (!sd_is_zoned(sdkp))
@@ -128,13 +166,11 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t 
sector,
                return -EOPNOTSUPP;
 
        /*
-        * Get a reply buffer for the number of requested zones plus a header,
-        * without exceeding the device maximum command size. For ATA disks,
-        * buffers must be aligned to 512B.
+        * Try to get a buffer that can fits the requested number of zones plus
+        * the command reply header, all 64B in size.
         */
-       buflen = min(queue_max_hw_sectors(disk->queue) << 9,
-                    roundup((nrz + 1) * 64, 512));
-       buf = kmalloc(buflen, gfp_mask);
+       buflen = (nrz + 1) * 64;
+       buf = sd_zbc_alloc_report_buffer(disk, &buflen, gfp_mask);
        if (!buf)
                return -ENOMEM;
 
@@ -153,7 +189,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t 
sector,
        *nr_zones = nrz;
 
 out_free_buf:
-       kfree(buf);
+       free_pages((unsigned long)buf, get_order(buflen));
 
        return ret;
 }
-- 
2.21.0

Reply via email to