From: Christoph Hellwig <[email protected]>

Add a new fsync opcode, which either syncs a range if one is passed,
or the whole file if the offset and length fields are both cleared
to zero.  A flag is provided to use fdatasync semantics, that is only
force out metadata which is required to retrieve the file data, but
not others like metadata.

Signed-off-by: Christoph Hellwig <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c                 | 40 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  8 ++++++-
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 927d61ebecf3..6cdf11373304 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4,6 +4,7 @@
  * supporting fast/efficient IO.
  *
  * Copyright (C) 2018-2019 Jens Axboe
+ * Copyright (c) 2018-2019 Christoph Hellwig
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -518,6 +519,42 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
        return 0;
 }
 
+static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+                   bool force_nonblock)
+{
+       struct io_ring_ctx *ctx = req->ctx;
+       loff_t sqe_off = READ_ONCE(sqe->off);
+       loff_t sqe_len = READ_ONCE(sqe->len);
+       loff_t end = sqe_off + sqe_len;
+       unsigned fsync_flags;
+       struct file *file;
+       int ret, fd;
+
+       /* fsync always requires a blocking context */
+       if (force_nonblock)
+               return -EAGAIN;
+
+       if (unlikely(sqe->addr || sqe->ioprio))
+               return -EINVAL;
+
+       fsync_flags = READ_ONCE(sqe->fsync_flags);
+       if (unlikely(fsync_flags & ~IORING_FSYNC_DATASYNC))
+               return -EINVAL;
+
+       fd = READ_ONCE(sqe->fd);
+       file = fget(fd);
+       if (unlikely(!file))
+               return -EBADF;
+
+       ret = vfs_fsync_range(file, sqe_off, end > 0 ? end : LLONG_MAX,
+                               fsync_flags & IORING_FSYNC_DATASYNC);
+
+       fput(file);
+       io_cqring_add_event(ctx, sqe->user_data, ret, 0);
+       io_free_req(req);
+       return 0;
+}
+
 static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
                           const struct sqe_submit *s, bool force_nonblock)
 {
@@ -539,6 +576,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct 
io_kiocb *req,
        case IORING_OP_WRITEV:
                ret = io_write(req, s, force_nonblock);
                break;
+       case IORING_OP_FSYNC:
+               ret = io_fsync(req, s->sqe, force_nonblock);
+               break;
        default:
                ret = -EINVAL;
                break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac692823d6f4..4589d56d0b68 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -24,7 +24,7 @@ struct io_uring_sqe {
        __u32   len;            /* buffer size or number of iovecs */
        union {
                __kernel_rwf_t  rw_flags;
-               __u32           __resv;
+               __u32           fsync_flags;
        };
        __u64   user_data;      /* data to be passed back at completion time */
        __u64   __pad2[3];
@@ -33,6 +33,12 @@ struct io_uring_sqe {
 #define IORING_OP_NOP          0
 #define IORING_OP_READV                1
 #define IORING_OP_WRITEV       2
+#define IORING_OP_FSYNC                3
+
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC  (1U << 0)
 
 /*
  * IO completion data structure (Completion Queue Entry)
-- 
2.17.1

Reply via email to