[PATCH 12/19] nvme-multipath: add PR support for libmultipath

John Garry Wed, 25 Feb 2026 07:59:57 -0800

Add PR support for libmultipath in the addition of nvme_mpath_pr_ops
structure.


The callbacks here pass mpath_device pointers. These can be converted to
NS pointer. However, the current PR callbacks for nvme_pr_ops work in
pass a bdev, and the helps us this to figure out if we are for a
multipath head or a NS. Later the send command helpers can be changed to
work per NS, when the full change to libmultipath happens. Until then,
have separate per-NS command send helpers. The original PR callback
functions from nvme_pr_ops can also be refactored to use the new
NS-based callbacks then, reducing duplication.

The new NS-based helpers are marked as __maybe_unused until the switch
to libmultipath happens.

Signed-off-by: John Garry <[email protected]>
---
 drivers/nvme/host/multipath.c |   1 +
 drivers/nvme/host/nvme.h      |   1 +
 drivers/nvme/host/pr.c        | 314 ++++++++++++++++++++++++++++++++++
 3 files changed, 316 insertions(+)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 6cadbc0449d3d..ac75db92dd124 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -1501,6 +1501,7 @@ static const struct mpath_head_template mpdt = {
        .get_access_state = nvme_mpath_get_access_state,
        .bdev_ioctl = nvme_mpath_bdev_ioctl,
        .cdev_ioctl = nvme_mpath_cdev_ioctl,
+       .pr_ops = &nvme_mpath_pr_ops,
        .chr_uring_cmd = nvme_mpath_chr_uring_cmd,
        .chr_uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
        .get_iopolicy = nvme_mpath_get_iopolicy,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index da9bd1ada6ad6..619d2fff969e3 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -22,6 +22,7 @@
 #include <trace/events/block.h>
 
 extern const struct pr_ops nvme_pr_ops;
+extern const struct mpath_pr_ops nvme_mpath_pr_ops;
 
 extern unsigned int nvme_io_timeout;
 #define NVME_IO_TIMEOUT        (nvme_io_timeout * HZ)
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index ad2ecc2f49a97..fd5a9f309a56f 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -116,6 +116,51 @@ static int nvme_send_pr_command(struct block_device *bdev, 
u32 cdw10, u32 cdw11,
        return ret < 0 ? ret : nvme_status_to_pr_err(ret);
 }
 
+static int __nvme_send_pr_command_ns(struct nvme_ns *ns, u32 cdw10,
+               u32 cdw11, u8 op, void *data, unsigned int data_len)
+{
+       struct nvme_command c = { 0 };
+
+       c.common.opcode = op;
+       c.common.cdw10 = cpu_to_le32(cdw10);
+       c.common.cdw11 = cpu_to_le32(cdw11);
+
+       return nvme_send_ns_pr_command(ns, &c, data, data_len);
+}
+
+static int nvme_send_pr_command_ns(struct nvme_ns *ns, u32 cdw10, u32 cdw11,
+               u8 op, void *data, unsigned int data_len)
+{
+       int ret;
+
+       ret = __nvme_send_pr_command_ns(ns, cdw10, cdw11, op, data, data_len);
+       return ret < 0 ? ret : nvme_status_to_pr_err(ret);
+}
+
+__maybe_unused
+static int nvme_pr_register_ns(struct nvme_ns *ns, u64 old_key, u64 new_key,
+                       u32 flags)
+{
+       struct nvmet_pr_register_data data = { 0 };
+       u32 cdw10;
+       int ret;
+
+       if (flags & ~PR_FL_IGNORE_KEY)
+               return -EOPNOTSUPP;
+
+       data.crkey = cpu_to_le64(old_key);
+       data.nrkey = cpu_to_le64(new_key);
+
+       cdw10 = old_key ? NVME_PR_REGISTER_ACT_REPLACE :
+               NVME_PR_REGISTER_ACT_REG;
+       cdw10 |= (flags & PR_FL_IGNORE_KEY) ? NVME_PR_IGNORE_KEY : 0;
+       cdw10 |= NVME_PR_CPTPL_PERSIST;
+
+       ret = nvme_send_pr_command_ns(ns, cdw10, 0, nvme_cmd_resv_register,
+                       &data, sizeof(data));
+       return ret;
+}
+
 static int nvme_pr_register(struct block_device *bdev, u64 old_key, u64 
new_key,
                unsigned int flags)
 {
@@ -137,6 +182,26 @@ static int nvme_pr_register(struct block_device *bdev, u64 
old_key, u64 new_key,
                        &data, sizeof(data));
 }
 
+__maybe_unused
+static int nvme_pr_reserve_ns(struct nvme_ns *ns, u64 key, enum pr_type type,
+               u32 flags)
+{
+       struct nvmet_pr_acquire_data data = { 0 };
+       u32 cdw10;
+
+       if (flags & ~PR_FL_IGNORE_KEY)
+               return -EOPNOTSUPP;
+
+       data.crkey = cpu_to_le64(key);
+
+       cdw10 = NVME_PR_ACQUIRE_ACT_ACQUIRE;
+       cdw10 |= nvme_pr_type_from_blk(type) << 8;
+       cdw10 |= (flags & PR_FL_IGNORE_KEY) ? NVME_PR_IGNORE_KEY : 0;
+
+       return nvme_send_pr_command_ns(ns, cdw10, 0, nvme_cmd_resv_acquire,
+                       &data, sizeof(data));
+}
+
 static int nvme_pr_reserve(struct block_device *bdev, u64 key,
                enum pr_type type, unsigned flags)
 {
@@ -156,6 +221,24 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 
key,
                        &data, sizeof(data));
 }
 
+__maybe_unused
+static int nvme_pr_preempt_ns(struct nvme_ns *ns, u64 old, u64 new,
+               enum pr_type type, bool abort)
+{
+       struct nvmet_pr_acquire_data data = { 0 };
+       u32 cdw10;
+
+       data.crkey = cpu_to_le64(old);
+       data.prkey = cpu_to_le64(new);
+
+       cdw10 = abort ? NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT :
+                       NVME_PR_ACQUIRE_ACT_PREEMPT;
+       cdw10 |= nvme_pr_type_from_blk(type) << 8;
+
+       return nvme_send_pr_command_ns(ns, cdw10, 0, nvme_cmd_resv_acquire,
+                       &data, sizeof(data));
+}
+
 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
                enum pr_type type, bool abort)
 {
@@ -173,6 +256,21 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 
old, u64 new,
                        &data, sizeof(data));
 }
 
+__maybe_unused
+static int nvme_pr_clear_ns(struct nvme_ns *ns, u64 key)
+{
+       struct nvmet_pr_release_data data = { 0 };
+       u32 cdw10;
+
+       data.crkey = cpu_to_le64(key);
+
+       cdw10 = NVME_PR_RELEASE_ACT_CLEAR;
+       cdw10 |= key ? 0 : NVME_PR_IGNORE_KEY;
+
+       return nvme_send_pr_command_ns(ns, cdw10, 0, nvme_cmd_resv_release,
+                       &data, sizeof(data));
+}
+
 static int nvme_pr_clear(struct block_device *bdev, u64 key)
 {
        struct nvmet_pr_release_data data = { 0 };
@@ -202,6 +300,45 @@ static int nvme_pr_release(struct block_device *bdev, u64 
key, enum pr_type type
                        &data, sizeof(data));
 }
 
+__maybe_unused
+static int nvme_pr_release_ns(struct nvme_ns *ns, u64 key, enum pr_type type)
+{
+       struct nvmet_pr_release_data data = { 0 };
+       u32 cdw10;
+
+       data.crkey = cpu_to_le64(key);
+
+       cdw10 = NVME_PR_RELEASE_ACT_RELEASE;
+       cdw10 |= nvme_pr_type_from_blk(type) << 8;
+       cdw10 |= key ? 0 : NVME_PR_IGNORE_KEY;
+
+       return nvme_send_pr_command_ns(ns, cdw10, 0, nvme_cmd_resv_release,
+                       &data, sizeof(data));
+}
+
+static int nvme_mpath_pr_resv_report_ns(struct nvme_ns *ns, void *data,
+               u32 data_len, bool *eds)
+{
+       u32 cdw10, cdw11;
+       int ret;
+
+       cdw10 = nvme_bytes_to_numd(data_len);
+       cdw11 = NVME_EXTENDED_DATA_STRUCT;
+       *eds = true;
+
+retry:
+       ret = __nvme_send_pr_command_ns(ns, cdw10, cdw11, nvme_cmd_resv_report,
+                       data, data_len);
+       if (ret == NVME_SC_HOST_ID_INCONSIST &&
+           cdw11 == NVME_EXTENDED_DATA_STRUCT) {
+               cdw11 = 0;
+               *eds = false;
+               goto retry;
+       }
+
+       return ret < 0 ? ret : nvme_status_to_pr_err(ret);
+}
+
 static int nvme_pr_resv_report(struct block_device *bdev, void *data,
                u32 data_len, bool *eds)
 {
@@ -225,6 +362,52 @@ static int nvme_pr_resv_report(struct block_device *bdev, 
void *data,
        return ret < 0 ? ret : nvme_status_to_pr_err(ret);
 }
 
+__maybe_unused
+static int nvme_pr_read_keys_ns(struct nvme_ns *ns, struct pr_keys *keys_info)
+{
+       size_t rse_len;
+       u32 num_keys = keys_info->num_keys;
+       struct nvme_reservation_status_ext *rse;
+       int ret, i;
+       bool eds;
+
+       /*
+        * Assume we are using 128-bit host IDs and allocate a buffer large
+        * enough to get enough keys to fill the return keys buffer.
+        */
+       rse_len = struct_size(rse, regctl_eds, num_keys);
+       if (rse_len > U32_MAX)
+               return -EINVAL;
+
+       rse = kzalloc(rse_len, GFP_KERNEL);
+       if (!rse)
+               return -ENOMEM;
+
+       ret = nvme_mpath_pr_resv_report_ns(ns, rse, rse_len, &eds);
+       if (ret)
+               goto free_rse;
+
+       keys_info->generation = le32_to_cpu(rse->gen);
+       keys_info->num_keys = get_unaligned_le16(&rse->regctl);
+
+       num_keys = min(num_keys, keys_info->num_keys);
+       for (i = 0; i < num_keys; i++) {
+               if (eds) {
+                       keys_info->keys[i] =
+                                       le64_to_cpu(rse->regctl_eds[i].rkey);
+               } else {
+                       struct nvme_reservation_status *rs;
+
+                       rs = (struct nvme_reservation_status *)rse;
+                       keys_info->keys[i] = le64_to_cpu(rs->regctl_ds[i].rkey);
+               }
+       }
+
+free_rse:
+       kfree(rse);
+       return ret;
+}
+
 static int nvme_pr_read_keys(struct block_device *bdev,
                struct pr_keys *keys_info)
 {
@@ -271,6 +454,70 @@ static int nvme_pr_read_keys(struct block_device *bdev,
        return ret;
 }
 
+__maybe_unused
+static int nvme_pr_read_reservation_ns(struct nvme_ns *ns,
+                                 struct pr_held_reservation *resv)
+{
+       struct nvme_reservation_status_ext tmp_rse, *rse;
+       int ret, i, num_regs;
+       u32 rse_len;
+       bool eds;
+
+get_num_regs:
+       /*
+        * Get the number of registrations so we know how big to allocate
+        * the response buffer.
+        */
+       ret = nvme_mpath_pr_resv_report_ns(ns, &tmp_rse, sizeof(tmp_rse),
+                                       &eds);
+       if (ret)
+               return ret;
+
+       num_regs = get_unaligned_le16(&tmp_rse.regctl);
+       if (!num_regs) {
+               resv->generation = le32_to_cpu(tmp_rse.gen);
+               return 0;
+       }
+
+       rse_len = struct_size(rse, regctl_eds, num_regs);
+       rse = kzalloc(rse_len, GFP_KERNEL);
+       if (!rse)
+               return -ENOMEM;
+
+       ret = nvme_mpath_pr_resv_report_ns(ns, rse, rse_len, &eds);
+       if (ret)
+               goto free_rse;
+
+       if (num_regs != get_unaligned_le16(&rse->regctl)) {
+               kfree(rse);
+               goto get_num_regs;
+       }
+
+       resv->generation = le32_to_cpu(rse->gen);
+       resv->type = block_pr_type_from_nvme(rse->rtype);
+
+       for (i = 0; i < num_regs; i++) {
+               if (eds) {
+                       if (rse->regctl_eds[i].rcsts) {
+                               resv->key = 
le64_to_cpu(rse->regctl_eds[i].rkey);
+                               break;
+                       }
+               } else {
+                       struct nvme_reservation_status *rs;
+
+                       rs = (struct nvme_reservation_status *)rse;
+                       if (rs->regctl_ds[i].rcsts) {
+                               resv->key = le64_to_cpu(rs->regctl_ds[i].rkey);
+                               break;
+                       }
+               }
+       }
+
+free_rse:
+       kfree(rse);
+       return ret;
+}
+
 static int nvme_pr_read_reservation(struct block_device *bdev,
                struct pr_held_reservation *resv)
 {
@@ -333,6 +580,73 @@ static int nvme_pr_read_reservation(struct block_device 
*bdev,
        return ret;
 }
 
+#if defined(CONFIG_NVME_MULTIPATH)
+static int nvme_mpath_pr_register(struct mpath_device *mpath_device,
+               u64 old_key, u64 new_key, unsigned int flags)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_register_ns(ns, old_key, new_key, flags);
+}
+
+static int nvme_mpath_pr_reserve(struct mpath_device *mpath_device, u64 key,
+               enum pr_type type, unsigned flags)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_reserve_ns(ns, key, type, flags);
+}
+
+static int nvme_mpath_pr_release(struct mpath_device *mpath_device, u64 key,
+               enum pr_type type)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_release_ns(ns, key, type);
+}
+
+static int nvme_mpath_pr_preempt(struct mpath_device *mpath_device, u64 old,
+               u64 new, enum pr_type type, bool abort)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_preempt_ns(ns, old, new, type, abort);
+}
+
+static int nvme_mpath_pr_clear(struct mpath_device *mpath_device, u64 key)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_clear_ns(ns, key);
+}
+
+static int nvme_mpath_pr_read_keys(struct mpath_device *mpath_device,
+               struct pr_keys *keys_info)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_read_keys_ns(ns, keys_info);
+}
+
+static int nvme_mpath_pr_read_reservation(struct mpath_device *mpath_device,
+               struct pr_held_reservation *resv)
+{
+       struct nvme_ns *ns = nvme_mpath_to_ns(mpath_device);
+
+       return nvme_pr_read_reservation_ns(ns, resv);
+}
+
+const struct mpath_pr_ops nvme_mpath_pr_ops = {
+       .pr_register    = nvme_mpath_pr_register,
+       .pr_reserve     = nvme_mpath_pr_reserve,
+       .pr_release     = nvme_mpath_pr_release,
+       .pr_preempt     = nvme_mpath_pr_preempt,
+       .pr_clear       = nvme_mpath_pr_clear,
+       .pr_read_keys   = nvme_mpath_pr_read_keys,
+       .pr_read_reservation = nvme_mpath_pr_read_reservation,
+};
+#endif
+
 const struct pr_ops nvme_pr_ops = {
        .pr_register    = nvme_pr_register,
        .pr_reserve     = nvme_pr_reserve,
-- 
2.43.5

[PATCH 12/19] nvme-multipath: add PR support for libmultipath

Reply via email to