[PATCH 0/1][RFC] mm: prepare_write positive return value

Dmitriy Monakhov Tue, 06 Feb 2007 00:36:08 -0800

Almost all read/write operation handles data with chunks(segments or pages)
and result has integral behaviour for folowing scenario: 
for_each_chunk() {
     res = op(....);
     if(IS_ERROR(res))
           return progress ? progress : res;
     progress += res;
}
prepare_write may has integral behaviour in case of blksize < pgsize,
for example we successfully allocated/read some blocks, but not all of them,
and than some error happend. Currently we eliminate this progress by doing
vmtrunate() after prepare_has failed.
It is good to have ability to signal about this progress. Interprete positive
prepare_write() ret code as bytes num that fs ready to handle at this moment.
I've ask SAW, he think it is sane. This size always less than PAGE_CACHE_SIZE
so it less than AOP_TRUNCATED_PAGE too.
 
BTH: This approach was used in OpenVZ 2.6.9 kernel in order to make FS with 
delayed allocation more correct, and its works well.
I think not everybody will happy about this,  but let's discuss all advantages
and disadvantages of this change.


Signed-off-by: Dmitriy Monakhov <[EMAIL PROTECTED]>
-------------

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 62632f5..b4f6eac 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -239,7 +239,11 @@ static int do_lo_send_aops(struct loop_device *lo, struct 
bio_vec *bvec,
                                page_cache_release(page);
                                continue;
                        }
-                       goto unlock;
+                       if (ret > 0 && ret < PAGE_CACHE_SIZE)
+                       /* prepare_write demands limit size of bytes. */
+                               size = min(size, (unsigned)ret);
+                       else
+                               goto unlock;
                }
                transfer_result = lo_do_transfer(lo, WRITE, page, offset,
                                bvec->bv_page, bv_offs, size, IV);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 1e5d2ba..b5eebe8 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -454,6 +454,17 @@ static int ecryptfs_write_inode_size_to_header(struct file 
*lower_file,
        }
        lower_a_ops = lower_inode->i_mapping->a_ops;
        rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8);
+       if (unlikely(rc > 0 && rc < PAGE_CACHE_SIZE)) {
+       /* 
+        * prepare_write can handle less bytes whan we need. This is not likely
+        * to happend realy because usualy we need only one block. In order to
+        * preserve prepare/commit balanced invoke commit end fail.
+        */
+               int ret;
+               ret = lower_a_ops->commit_write(lower_file, header_page, 0, rc);
+               rc = ret ? ret : -ENOSPC;
+               goto unlock;
+       }
        file_size = (u64)i_size_read(inode);
        ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
        file_size = cpu_to_be64(file_size);
@@ -462,6 +473,7 @@ static int ecryptfs_write_inode_size_to_header(struct file 
*lower_file,
        kunmap_atomic(header_virt, KM_USER0);
        flush_dcache_page(header_page);
        rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
+unlock:
        if (rc < 0)
                ecryptfs_printk(KERN_ERR, "Error commiting header page "
                                "write\n");
diff --git a/fs/namei.c b/fs/namei.c
index b305589..723db81 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2704,6 +2704,16 @@ retry:
                page_cache_release(page);
                goto retry;
        }
+       if (unlikely(err > 0 && err < PAGE_CACHE_SIZE)) {
+       /* 
+        * prepare_write can handle less bytes whan we need. This is not likely
+        * to happend realy because usualy we need only one block. In order to
+        * preserve prepare/commit balanced invoke commit end fail.
+        */
+               int ret;
+               ret = mapping->a_ops->commit_write(NULL, page, 0, err);
+               err = ret ? ret : -ENOSPC;
+       }
        if (err)
                goto fail_map;
        kaddr = kmap_atomic(page, KM_USER0);
diff --git a/fs/splice.c b/fs/splice.c
index 2fca6eb..d2b92bf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -652,6 +652,17 @@ find_page:
        if (unlikely(ret)) {
                loff_t isize = i_size_read(mapping->host);
 
+               if (ret > 0 && ret < PAGE_CACHE_SIZE) {
+               /* 
+                * prepare_write demands limit size of bytes. In order to
+                * preserve prepare/commit balanced invoke commit end fail. 
+                * Initial i_size saved, so vmtruncate safely restore it later.
+                */
+                       int ret2;
+                       ret2 = mapping->a_ops->commit_write(file, page, offset,
+                               offset + ret);
+                       ret = ret2 ? ret2 : -ENOSPC;
+               }
                if (ret != AOP_TRUNCATED_PAGE)
                        unlock_page(page);
                page_cache_release(page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 5fe315a..529eb9e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2188,6 +2188,11 @@ generic_file_buffered_write(struct kiocb *iocb, const 
struct iovec *iov,
                }
 
                status = a_ops->prepare_write(file, page, offset, offset+bytes);
+               if (unlikely(status > 0 && status < PAGE_CACHE_SIZE)) {
+               /* prepare_write demands limit size of bytes at this 
iteration.*/
+                       bytes = min(bytes, (size_t)status);
+                       status = 0;
+               }
                if (unlikely(status)) {
                        loff_t isize = i_size_read(inode);

[PATCH 0/1][RFC] mm: prepare_write positive return value

Reply via email to