On 2021/05/20 15:10, Hannes Reinecke wrote:
[...]
>> +/*
>> + * First phase of BIO mapping for targets with zone append emulation:
>> + * check all BIO that change a zone writer pointer and change zone
>> + * append operations into regular write operations.
>> + */
>> +static bool dm_zone_map_bio_begin(struct mapped_device *md,
>> +                              struct bio *orig_bio, struct bio *clone)
>> +{
>> +    sector_t zone_sectors = blk_queue_zone_sectors(md->queue);
>> +    unsigned int zno = bio_zone_no(orig_bio);
>> +    unsigned long flags;
>> +    bool good_io = false;
>> +
>> +    spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +
>> +    /*
>> +     * If the target zone is in an error state, recover by inspecting the
>> +     * zone to get its current write pointer position. Note that since the
>> +     * target zone is already locked, a BIO issuing context should never
>> +     * see the zone write in the DM_ZONE_UPDATING_WP_OFST state.
>> +     */
>> +    if (md->zwp_offset[zno] == DM_ZONE_INVALID_WP_OFST) {
>> +            unsigned int wp_offset;
>> +            int ret;
>> +
>> +            md->zwp_offset[zno] = DM_ZONE_UPDATING_WP_OFST;
>> +
>> +            spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
>> +            ret = dm_update_zone_wp_offset(md, zno, &wp_offset);
>> +            spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +
>> +            if (ret) {
>> +                    md->zwp_offset[zno] = DM_ZONE_INVALID_WP_OFST;
>> +                    goto out;
>> +            }
>> +            md->zwp_offset[zno] = wp_offset;
>> +    } else if (md->zwp_offset[zno] == DM_ZONE_UPDATING_WP_OFST) {
>> +            DMWARN_LIMIT("Invalid DM_ZONE_UPDATING_WP_OFST state");
>> +            goto out;
>> +    }
>> +
>> +    switch (bio_op(orig_bio)) {
>> +    case REQ_OP_WRITE_ZEROES:
>> +    case REQ_OP_WRITE_SAME:
>> +    case REQ_OP_WRITE:
>> +            break;
>> +    case REQ_OP_ZONE_RESET:
>> +    case REQ_OP_ZONE_FINISH:
>> +            goto good;
>> +    case REQ_OP_ZONE_APPEND:
>> +            /*
>> +             * Change zone append operations into a non-mergeable regular
>> +             * writes directed at the current write pointer position of the
>> +             * target zone.
>> +             */
>> +            clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
>> +                    (orig_bio->bi_opf & (~REQ_OP_MASK));
>> +            clone->bi_iter.bi_sector =
>> +                    orig_bio->bi_iter.bi_sector + md->zwp_offset[zno];
>> +            break;
>> +    default:
>> +            DMWARN_LIMIT("Invalid BIO operation");
>> +            goto out;
>> +    }
>> +
>> +    /* Cannot write to a full zone */
>> +    if (md->zwp_offset[zno] >= zone_sectors)
>> +            goto out;
>> +
>> +    /* Writes must be aligned to the zone write pointer */
>> +    if ((clone->bi_iter.bi_sector & (zone_sectors - 1)) != 
>> md->zwp_offset[zno])
>> +            goto out;
>> +
>> +good:
>> +    good_io = true;
>> +
>> +out:
>> +    spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
> 
> I'm not happy with the spinlock. Can't the same effect be achieved with 
> some clever READ_ONCE()/WRITE_ONCE/cmpexch magic?
> Especially as you have a separate 'zone lock' mechanism ...

hmmm... Let me see. Given that what the bio completion is relatively simple, it
may be possible. With more coffee, I amy be able to come up with something 
clever.

> 
>> +
>> +    return good_io;
>> +}
>> +
>> +/*
>> + * Second phase of BIO mapping for targets with zone append emulation:
>> + * update the zone write pointer offset array to account for the additional
>> + * data written to a zone. Note that at this point, the remapped clone BIO
>> + * may already have completed, so we do not touch it.
>> + */
>> +static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
>> +                                    struct bio *orig_bio,
>> +                                    unsigned int nr_sectors)
>> +{
>> +    unsigned int zno = bio_zone_no(orig_bio);
>> +    blk_status_t sts = BLK_STS_OK;
>> +    unsigned long flags;
>> +
>> +    spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +
>> +    /* Update the zone wp offset */
>> +    switch (bio_op(orig_bio)) {
>> +    case REQ_OP_ZONE_RESET:
>> +            md->zwp_offset[zno] = 0;
>> +            break;
>> +    case REQ_OP_ZONE_FINISH:
>> +            md->zwp_offset[zno] = blk_queue_zone_sectors(md->queue);
>> +            break;
>> +    case REQ_OP_WRITE_ZEROES:
>> +    case REQ_OP_WRITE_SAME:
>> +    case REQ_OP_WRITE:
>> +            md->zwp_offset[zno] += nr_sectors;
>> +            break;
>> +    case REQ_OP_ZONE_APPEND:
>> +            /*
>> +             * Check that the target did not truncate the write operation
>> +             * emulating a zone append.
>> +             */
>> +            if (nr_sectors != bio_sectors(orig_bio)) {
>> +                    DMWARN_LIMIT("Truncated write for zone append");
>> +                    sts = BLK_STS_IOERR;
>> +                    break;
>> +            }
>> +            md->zwp_offset[zno] += nr_sectors;
>> +            break;
>> +    default:
>> +            DMWARN_LIMIT("Invalid BIO operation");
>> +            sts = BLK_STS_IOERR;
>> +            break;
>> +    }
>> +
>> +    spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
> 
> You don't need the spinlock here; using WRITE_ONCE() should be sufficient.

If other references to zwp_offset use READ_ONCE(), no ?

[...]
>> +void dm_zone_endio(struct dm_io *io, struct bio *clone)
>> +{
>> +    struct mapped_device *md = io->md;
>> +    struct request_queue *q = md->queue;
>> +    struct bio *orig_bio = io->orig_bio;
>> +    unsigned long flags;
>> +    unsigned int zno;
>> +
>> +    /*
>> +     * For targets that do not emulate zone append, we only need to
>> +     * handle native zone-append bios.
>> +     */
>> +    if (!dm_emulate_zone_append(md)) {
>> +            /*
>> +             * Get the offset within the zone of the written sector
>> +             * and add that to the original bio sector position.
>> +             */
>> +            if (clone->bi_status == BLK_STS_OK &&
>> +                bio_op(clone) == REQ_OP_ZONE_APPEND) {
>> +                    sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1;
>> +
>> +                    orig_bio->bi_iter.bi_sector +=
>> +                            clone->bi_iter.bi_sector & mask;
>> +            }
>> +
>> +            return;
>> +    }
>> +
>> +    /*
>> +     * For targets that do emulate zone append, if the clone BIO does not
>> +     * own the target zone write lock, we have nothing to do.
>> +     */
>> +    if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
>> +            return;
>> +
>> +    zno = bio_zone_no(orig_bio);
>> +
>> +    spin_lock_irqsave(&md->zwp_offset_lock, flags);
>> +    if (clone->bi_status != BLK_STS_OK) {
>> +            /*
>> +             * BIOs that modify a zone write pointer may leave the zone
>> +             * in an unknown state in case of failure (e.g. the write
>> +             * pointer was only partially advanced). In this case, set
>> +             * the target zone write pointer as invalid unless it is
>> +             * already being updated.
>> +             */
>> +            if (md->zwp_offset[zno] != DM_ZONE_UPDATING_WP_OFST)
>> +                    md->zwp_offset[zno] = DM_ZONE_INVALID_WP_OFST;
>> +    } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
>> +            /*
>> +             * Get the written sector for zone append operation that were
>> +             * emulated using regular write operations.
>> +             */
>> +            if (WARN_ON_ONCE(md->zwp_offset[zno] < bio_sectors(orig_bio)))
>> +                    md->zwp_offset[zno] = DM_ZONE_INVALID_WP_OFST;
>> +            else
>> +                    orig_bio->bi_iter.bi_sector +=
>> +                            md->zwp_offset[zno] - bio_sectors(orig_bio);
>> +    }
>> +    spin_unlock_irqrestore(&md->zwp_offset_lock, flags);
>> +
> 
> Similar comments to the spinlock here.

OK.

Thanks for the review.


-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel

Reply via email to