----- Original Message -----
| This adds a full fledget direct I/O implementation using the iomap
| interface. Full fledged in this case means all features are supported:
| AIO, vectored I/O, any iov_iter type including kernel pointers, bvecs
| and pipes, support for hole filling and async apending writes. It does
| not mean supporting all the warts of the old generic code. We expect
| i_rwsem to be held over the duration of the call, and we expect to
| maintain i_dio_count ourselves, and we pass on any kinds of mapping
| to the file system for now.
|
| The algorithm used is very simple: We use iomap_apply to iterate over
| the range of the I/O, and then we use the new bio_iov_iter_get_pages
| helper to lock down the user range for the size of the extent.
| bio_iov_iter_get_pages can currently lock down twice as many pages as
| the old direct I/O code did, which means that we will have a better
| batch factor for everything but overwrites of badly fragmented files.
|
| Signed-off-by: Christoph Hellwig <[email protected]>
| ---
(snip)
| +static blk_qc_t
| +iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
| + unsigned len)
| +{
| + struct page *page = ZERO_PAGE(0);
| + struct bio *bio;
| +
| + bio = bio_alloc(GFP_KERNEL, 1);
It's unlikely, but bio_alloc can return NULL; shouldn't the code be checking
for that?
| + bio->bi_bdev = iomap->bdev;
| + bio->bi_iter.bi_sector =
| + iomap->blkno + ((pos - iomap->offset) >> 9);
| + bio->bi_private = dio;
| + bio->bi_end_io = iomap_dio_bio_end_io;
| +
| + get_page(page);
| + if (bio_add_page(bio, page, len, 0) != len)
| + BUG();
| + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
| +
| + atomic_inc(&dio->ref);
| + return submit_bio(bio);
| +}
| +
| +static loff_t
| +iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
| + void *data, struct iomap *iomap)
| +{
| + struct iomap_dio *dio = data;
| + unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
| + unsigned fs_block_size = (1 << inode->i_blkbits), pad;
| + struct iov_iter iter = *dio->submit.iter;
| + struct bio *bio;
| + bool may_zero = false;
| + int nr_pages, ret;
| +
| + if ((pos | length | iov_iter_alignment(&iter)) & ((1 << blkbits) - 1))
| + return -EINVAL;
| +
| + switch (iomap->type) {
| + case IOMAP_HOLE:
| + /*
| + * We return -ENOTBLK to fall back to buffered I/O for file
| + * systems that can't fill holes from direct writes.
| + */
| + if (dio->flags & IOMAP_DIO_WRITE)
| + return -ENOTBLK;
| + /*FALLTHRU*/
| + case IOMAP_UNWRITTEN:
| + if (!(dio->flags & IOMAP_DIO_WRITE)) {
| + iov_iter_zero(length, dio->submit.iter);
| + dio->size += length;
| + return length;
| + }
| + dio->flags |= IOMAP_DIO_UNWRITTEN;
| + may_zero = true;
| + break;
| + case IOMAP_MAPPED:
| + if (iomap->flags & IOMAP_F_SHARED)
| + dio->flags |= IOMAP_DIO_COW;
| + if (iomap->flags & IOMAP_F_NEW)
| + may_zero = true;
| + break;
| + default:
| + WARN_ON_ONCE(1);
| + return -EIO;
| + }
| +
| + iov_iter_truncate(&iter, length);
| + nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
| + if (nr_pages <= 0)
| + return nr_pages;
| +
| + if (may_zero) {
| + pad = pos & (fs_block_size - 1);
| + if (pad)
| + iomap_dio_zero(dio, iomap, pos - pad, pad);
| + }
| +
| + do {
| + if (dio->error)
| + return 0;
| +
| + bio = bio_alloc(GFP_KERNEL, nr_pages);
Same here. Also: the code that follows is nearly identical; do you want to make
it a macro or inline function or something?
Regards,
Bob Peterson
Red Hat File Systems
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html