tree 59acb15a73fa0dc4393a086fb83f016105d84b2a
parent 5a53368277efa2d80dd2206dddc1f4b19ef0c32a
author Miklos Szeredi <[EMAIL PROTECTED]> Sat, 10 Sep 2005 03:10:35 -0700
committer Linus Torvalds <[EMAIL PROTECTED]> Sat, 10 Sep 2005 04:03:46 -0700

[PATCH] FUSE - direct I/O

This patch adds support for the "direct_io" mount option of FUSE.

When this mount option is specified, the page cache is bypassed for
read and write operations.  This is useful for example, if the
filesystem doesn't know the size of files before reading them, or when
any kind of caching is harmful.

Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]>
Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>

 fs/fuse/file.c   |  133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/fuse_i.h |    6 ++
 fs/fuse/inode.c  |    9 +++
 3 files changed, 146 insertions(+), 2 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -363,6 +363,118 @@ static int fuse_commit_write(struct file
        return err;
 }
 
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+       unsigned i;
+
+       for (i = 0; i < req->num_pages; i++) {
+               struct page *page = req->pages[i];
+               if (write)
+                       set_page_dirty_lock(page);
+               put_page(page);
+       }
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+                              unsigned nbytes, int write)
+{
+       unsigned long user_addr = (unsigned long) buf;
+       unsigned offset = user_addr & ~PAGE_MASK;
+       int npages;
+
+       /* This doesn't work with nfsd */
+       if (!current->mm)
+               return -EPERM;
+
+       nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+       npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
+       down_read(&current->mm->mmap_sem);
+       npages = get_user_pages(current, current->mm, user_addr, npages, write,
+                               0, req->pages, NULL);
+       up_read(&current->mm->mmap_sem);
+       if (npages < 0)
+               return npages;
+
+       req->num_pages = npages;
+       req->page_offset = offset;
+       return 0;
+}
+
+static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+                             size_t count, loff_t *ppos, int write)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       size_t nmax = write ? fc->max_write : fc->max_read;
+       loff_t pos = *ppos;
+       ssize_t res = 0;
+       struct fuse_req *req = fuse_get_request(fc);
+       if (!req)
+               return -ERESTARTSYS;
+
+       while (count) {
+               size_t tmp;
+               size_t nres;
+               size_t nbytes = min(count, nmax);
+               int err = fuse_get_user_pages(req, buf, nbytes, !write);
+               if (err) {
+                       res = err;
+                       break;
+               }
+               tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+               nbytes = min(nbytes, tmp);
+               if (write)
+                       nres = fuse_send_write(req, file, inode, pos, nbytes);
+               else
+                       nres = fuse_send_read(req, file, inode, pos, nbytes);
+               fuse_release_user_pages(req, !write);
+               if (req->out.h.error) {
+                       if (!res)
+                               res = req->out.h.error;
+                       break;
+               } else if (nres > nbytes) {
+                       res = -EIO;
+                       break;
+               }
+               count -= nres;
+               res += nres;
+               pos += nres;
+               buf += nres;
+               if (nres != nbytes)
+                       break;
+               if (count)
+                       fuse_reset_request(req);
+       }
+       fuse_put_request(fc, req);
+       if (res > 0) {
+               if (write && pos > i_size_read(inode))
+                       i_size_write(inode, pos);
+               *ppos = pos;
+       } else if (write && (res == -EINTR || res == -EIO))
+               fuse_invalidate_attr(inode);
+
+       return res;
+}
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+                                    size_t count, loff_t *ppos)
+{
+       return fuse_direct_io(file, buf, count, ppos, 0);
+}
+
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+                                size_t count, loff_t *ppos)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       ssize_t res;
+       /* Don't allow parallel writes to the same file */
+       down(&inode->i_sem);
+       res = fuse_direct_io(file, buf, count, ppos, 1);
+       up(&inode->i_sem);
+       return res;
+}
+
 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        if ((vma->vm_flags & VM_SHARED)) {
@@ -393,6 +505,17 @@ static struct file_operations fuse_file_
        .sendfile       = generic_file_sendfile,
 };
 
+static struct file_operations fuse_direct_io_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = fuse_direct_read,
+       .write          = fuse_direct_write,
+       .open           = fuse_open,
+       .flush          = fuse_flush,
+       .release        = fuse_release,
+       .fsync          = fuse_fsync,
+       /* no mmap and sendfile */
+};
+
 static struct address_space_operations fuse_file_aops  = {
        .readpage       = fuse_readpage,
        .prepare_write  = fuse_prepare_write,
@@ -403,6 +526,12 @@ static struct address_space_operations f
 
 void fuse_init_file_inode(struct inode *inode)
 {
-       inode->i_fop = &fuse_file_operations;
-       inode->i_data.a_ops = &fuse_file_aops;
+       struct fuse_conn *fc = get_fuse_conn(inode);
+
+       if (fc->flags & FUSE_DIRECT_IO)
+               inode->i_fop = &fuse_direct_io_file_operations;
+       else {
+               inode->i_fop = &fuse_file_operations;
+               inode->i_data.a_ops = &fuse_file_aops;
+       }
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -34,6 +34,9 @@
     be flushed on open */
 #define FUSE_KERNEL_CACHE        (1 << 2)
 
+/** Bypass the page cache for read and write operations  */
+#define FUSE_DIRECT_IO           (1 << 3)
+
 /** FUSE inode */
 struct fuse_inode {
        /** Inode data */
@@ -207,6 +210,9 @@ struct fuse_conn {
        /** Maximum read size */
        unsigned max_read;
 
+       /** Maximum write size */
+       unsigned max_write;
+
        /** Readers of the connection are waiting on this */
        wait_queue_head_t waitq;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -258,6 +258,7 @@ enum {
        OPT_DEFAULT_PERMISSIONS,
        OPT_ALLOW_OTHER,
        OPT_KERNEL_CACHE,
+       OPT_DIRECT_IO,
        OPT_MAX_READ,
        OPT_ERR
 };
@@ -270,6 +271,7 @@ static match_table_t tokens = {
        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
        {OPT_ALLOW_OTHER,               "allow_other"},
        {OPT_KERNEL_CACHE,              "kernel_cache"},
+       {OPT_DIRECT_IO,                 "direct_io"},
        {OPT_MAX_READ,                  "max_read=%u"},
        {OPT_ERR,                       NULL}
 };
@@ -329,6 +331,10 @@ static int parse_fuse_opt(char *opt, str
                        d->flags |= FUSE_KERNEL_CACHE;
                        break;
 
+               case OPT_DIRECT_IO:
+                       d->flags |= FUSE_DIRECT_IO;
+                       break;
+
                case OPT_MAX_READ:
                        if (match_int(&args[0], &value))
                                return 0;
@@ -359,6 +365,8 @@ static int fuse_show_options(struct seq_
                seq_puts(m, ",allow_other");
        if (fc->flags & FUSE_KERNEL_CACHE)
                seq_puts(m, ",kernel_cache");
+       if (fc->flags & FUSE_DIRECT_IO)
+               seq_puts(m, ",direct_io");
        if (fc->max_read != ~0)
                seq_printf(m, ",max_read=%u", fc->max_read);
        return 0;
@@ -489,6 +497,7 @@ static int fuse_fill_super(struct super_
        fc->max_read = d.max_read;
        if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
                fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+       fc->max_write = FUSE_MAX_IN / 2;
 
        err = -ENOMEM;
        root = get_root_inode(sb, d.rootmode);
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to