On Wed, May 08, 2024 at 05:36:25PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu <luchangqi....@bytedance.com>
> Signed-off-by: zhenwei pi <pizhen...@bytedance.com>
> ---
>  hw/scsi/scsi-disk.c | 302 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 302 insertions(+)

Paolo: Please review for buffer overflows. I find the buffer size
assumption in the SCSI layer mysterious (e.g. scsi_req_get_buf() returns
a void* and it's not obvious how large the buffer is), so I didn't check
for buffer overflows.

> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..bdd66c4026 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -1474,6 +1475,296 @@ static void scsi_disk_emulate_read_data(SCSIRequest 
> *req)
>      scsi_req_complete(&r->req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +    uint32_t generation;
> +    uint32_t num_keys;
> +    uint64_t *keys;
> +    void     *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +    uint32_t generation;
> +    uint64_t key;
> +    BlockPrType type;
> +    void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +    int num_keys;
> +    uint8_t *buf;
> +    SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    num_keys = MIN(blk_keys->num_keys, ret);
> +    blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +    memcpy(&buf[0], &blk_keys->generation, 4);
> +    for (int i = 0; i < num_keys; i++) {
> +        blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +        memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> +    }
> +    num_keys = cpu_to_be32(num_keys * 8);
> +    memcpy(&buf[4], &num_keys, 4);
> +
> +    scsi_req_data(&r->req, r->buflen);
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_keys->keys);
> +    g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +    SCSIPrReadKeys *blk_keys;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +    int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> +    blk_keys = g_new0(SCSIPrReadKeys, 1);
> +    blk_keys->generation = 0;
> +    /* num_keys is the maximum number of keys that can be transmitted */
> +    blk_keys->num_keys = num_keys;
> +    blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +    blk_keys->req = r;
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, 
> &blk_keys->generation,
> +                                        blk_keys->num_keys, blk_keys->keys,
> +                                        scsi_pr_read_keys_complete, 
> blk_keys);
> +    return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +    uint8_t *buf;
> +    uint32_t num_keys = 0;
> +    SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +    memcpy(&buf[0], &blk_rsv->generation, 4);
> +    if (ret) {
> +        num_keys = cpu_to_be32(16);

The SPC-6 calls this field Additional Length, which is clearer than
num_keys (there is only 1 key here!). Could you rename it to
additional_len to avoid confusion?

> +        blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +        memcpy(&buf[8], &blk_rsv->key, 8);
> +        buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> +    } else {
> +        num_keys = cpu_to_be32(0);
> +    }
> +
> +    memcpy(&buf[4], &num_keys, 4);
> +    scsi_req_data(&r->req, r->buflen);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> +    SCSIPrReadReservation *blk_rsv;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    blk_rsv = g_malloc(sizeof(*blk_rsv));
> +    blk_rsv->generation = 0;
> +    blk_rsv->key = 0;
> +    blk_rsv->type = 0;
> +    blk_rsv->req = r;
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> +                   &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> +                   scsi_pr_read_reservation_complete, blk_rsv);
> +    return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> +    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    /* The request must only run in the BlockBackend's AioContext */
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +           qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    scsi_req_complete(&r->req, GOOD);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t old_key,
> +                                         uint64_t new_key, SCSIPrType type,
> +                                         bool ignore_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, old_key, new_key,
> +                                       scsi_pr_type_to_block(type),
> +                                       ignore_key, scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, key,
> +                                    scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t new_key,
> +                                        uint64_t old_key, SCSIPrType type,
> +                                        bool abort)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, new_key, old_key,
> +                                      scsi_pr_type_to_block(type), abort,
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> +    int rc;
> +    SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> +    switch (action) {
> +    case SCSI_PR_IN_READ_KEYS:
> +        rc = scsi_disk_emulate_pr_read_keys(req);
> +        break;
> +    case SCSI_PR_IN_READ_RESERVATION:
> +        rc = scsi_disk_emulate_pr_read_reservation(req);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> +    int rc;
> +    uint64_t old_key, new_key;
> +    SCSIPrOutAction action;
> +    SCSIPrScope scope;
> +    SCSIPrType type;
> +    SCSIRequest *req = &r->req;
> +
> +    memcpy(&old_key, &inbuf[0], 8);
> +    old_key = be64_to_cpu(old_key);
> +    memcpy(&new_key, &inbuf[8], 8);
> +    new_key = be64_to_cpu(new_key);
> +    action = req->cmd.buf[1] & 0x1f;
> +    scope = (req->cmd.buf[2] >> 4) & 0x0f;
> +    type = req->cmd.buf[2] & 0x0f;
> +
> +    if (scope != SCSI_PR_LU_SCOPE) {
> +        return -ENOTSUP;
> +    }
> +
> +    switch (action) {
> +    case SCSI_PR_OUT_REGISTER:
> +        rc = scsi_disk_emulate_pr_register(r, old_key, new_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> +        rc = scsi_disk_emulate_pr_register(r, old_key, new_key, type, true);
> +        break;
> +    case SCSI_PR_OUT_RESERVE:
> +        rc = scsi_disk_emulate_pr_reserve(r, old_key, type);
> +        break;
> +    case SCSI_PR_OUT_RELEASE:
> +        rc = scsi_disk_emulate_pr_release(r, old_key, type);
> +        break;
> +    case SCSI_PR_OUT_CLEAR:
> +        rc = scsi_disk_emulate_pr_clear(r, old_key);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT:
> +        rc = scsi_disk_emulate_pr_preempt(r, old_key, new_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> +        rc = scsi_disk_emulate_pr_preempt(r, old_key, new_key, type, true);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
>  static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
>                                         uint8_t *inbuf, int inlen)
>  {
> @@ -1957,6 +2248,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest 
> *req)
>          scsi_req_complete(&r->req, GOOD);
>          break;
>  
> +    case PERSISTENT_RESERVE_OUT:
> +        scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> +        break;
>      default:
>          abort();
>      }
> @@ -2213,6 +2507,12 @@ static int32_t scsi_disk_emulate_command(SCSIRequest 
> *req, uint8_t *buf)
>      case FORMAT_UNIT:
>          trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
>          break;
> +    case PERSISTENT_RESERVE_OUT:
> +        break;
> +    case PERSISTENT_RESERVE_IN:
> +        scsi_req_ref(&r->req);

Please add a comment indicating which scsi_req_unref() this ref is
paired with. That makes it easier to understand request reference
counting.

> +        scsi_disk_emulate_pr_in(req);
> +        return 0;
>      default:
>          trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
>                                                  scsi_command_name(buf[0]));
> @@ -2632,6 +2932,8 @@ static const SCSIReqOps *const 
> scsi_disk_reqops_dispatch[256] = {
>      [VERIFY_12]                       = &scsi_disk_emulate_reqops,
>      [VERIFY_16]                       = &scsi_disk_emulate_reqops,
>      [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_IN]           = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_OUT]          = &scsi_disk_emulate_reqops,
>  
>      [READ_6]                          = &scsi_disk_dma_reqops,
>      [READ_10]                         = &scsi_disk_dma_reqops,
> -- 
> 2.20.1
> 

Attachment: signature.asc
Description: PGP signature

Reply via email to